In [None]:
# INSTALLINGS

!pip install imagecodecs

Collecting imagecodecs
  Downloading imagecodecs-2021.11.20-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (31.0 MB)
[K     |████████████████████████████████| 31.0 MB 196 kB/s 
Installing collected packages: imagecodecs
Successfully installed imagecodecs-2021.11.20


In [None]:
!nvidia-smi

Sat Feb 19 18:52:27 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   41C    P8     9W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
# LINKAGE TO GOOGLE DRIVE AND LIBRERIES IMPORTING

from google.colab import drive
drive.mount('/content/drive')

import os
import random
import numpy as np
import plotly.express as px
import imagecodecs

from matplotlib import pyplot as plt
from tqdm import tqdm
from skimage.io import imread, imshow, imsave
from skimage.transform import resize
from skimage.segmentation import mark_boundaries
from scipy import ndimage
from skimage.util import img_as_float,img_as_ubyte, crop
from skimage.morphology import binary_dilation
from keras.utils.np_utils import to_categorical
from skimage import measure, morphology

Mounted at /content/drive


In [None]:
# DATASET UNRAR: LOADING DATASET IN COLAB

!pip install unrar
!unrar x "drive/MyDrive/cytology challenge condivisa/00_DATASET/train.rar"     # unraring training set
!unrar x "drive/MyDrive/cytology challenge condivisa/00_DATASET/validation.rar"   # unraring validation set
!unrar x "drive/MyDrive/cytology challenge condivisa/00_DATASET/test.rar"         # unraring test set

Collecting unrar
  Downloading unrar-0.4-py3-none-any.whl (25 kB)
Installing collected packages: unrar
Successfully installed unrar-0.4

UNRAR 5.50 freeware      Copyright (c) 1993-2017 Alexander Roshal


Extracting from drive/MyDrive/cytology challenge condivisa/00_DATASET/train.rar

Creating    train                                                     OK
Creating    train/images                                              OK
Extracting  train/images/102.tif                                           0%  OK 
Extracting  train/images/104.tif                                           0%  1%  OK 
Extracting  train/images/106.tif                                           1%  OK 
Extracting  train/images/108.tif                                           1%  2%  OK 
Extracting  train/images/109.tif                                           2%  OK 
Extracting  train/images/111.tif                                           2%  3%

In [None]:
# SETTINGS OF CURRENT PRE-PROCESSING

pre_proc_name = 'IL1'  # name of current pre-processing <------------------------------- CHANGE HERE
rsz = 512  # resizing size (resize images to rsz x rsz) <-------------------------------- CHANGE HERE
NUM_CLASSES = 3 # number of classes choosen for the problem <---------------------------- CHANGE HERE

In [None]:
# STORING TRAINING SET IMAGES AND MASKS IN PROPER NDARRAY

# path
tr_IMGS_path = os.path.join('train','images')
tr_MANU_path = os.path.join('train','manual')

# extracting list of images
tr_images = sorted(os.listdir(tr_IMGS_path))

# body
X_tr = np.zeros([len(tr_images),rsz,rsz,3], dtype=np.uint8)
Y_tr = np.zeros([len(tr_images),rsz,rsz], dtype=np.uint8)
for n, id_ in tqdm(enumerate(tr_images), total=len(tr_images)):

    # loading
    img = imread(tr_IMGS_path+'/'+id_) # uint8 stained image
    manu0 = imagecodecs.imread(tr_MANU_path+'/'+id_) # N layers manual segmentations (each layer a different MM cell)

    # "compressing" segmentation annotations on a single layer
    if len(manu0.shape)==2:
      manu = np.copy(manu0[:,:])
    else:
      manu = np.copy(manu0[:,:,0])
      for j in range(1,manu0.shape[2]):
        manu[manu0[:,:,j]==255] = 255
        manu[manu0[:,:,j]==128] = 128
        manu = morphology.area_opening(manu,0.001*manu.shape[0]*manu.shape[1])  # removal of small objects erroneously annotated (single pixels or little spots)
        manu = morphology.area_closing(manu,0.001*manu.shape[0]*manu.shape[1])  # removal of small holes errouneously not annotated

    #resizing
    img = img_as_ubyte(resize(img,[rsz,rsz]))
    manu = img_as_ubyte(resize(manu,[rsz,rsz]))
    manu[manu < 80] = 0
    manu[ (manu >= 80)*(manu <= 175) ] = 1
    manu[manu > 175] = 2

    # actual storage
    X_tr[n] = np.copy(img)
    Y_tr[n] = np.copy(manu)

# Y_tr = to_categorical(Y_tr, num_classes = NUM_CLASSES, dtype='float32')  # conversion to categorical data


# STORING VALIDATION SET IMAGES AND MASKS IN PROPER NDARRAY

# path
vl_IMGS_path = os.path.join('validation','images')
vl_MANU_path = os.path.join('validation','manual')

# extracting list of images
vl_images = sorted(os.listdir(vl_IMGS_path))

# body
X_vl = np.zeros([len(vl_images),rsz,rsz,3], dtype=np.uint8)
Y_vl = np.zeros([len(vl_images),rsz,rsz], dtype=np.uint8)
for n, id_ in tqdm(enumerate(vl_images), total=len(vl_images)):

    # loading
    img = imread(vl_IMGS_path+'/'+id_)
    manu0 = imagecodecs.imread(vl_MANU_path+'/'+id_) # N layers manual segmentations (each layer a different MM cell)

    # "compressing" segmentation annotations on a single layer
    if len(manu0.shape)==2:
      manu = np.copy(manu0[:,:])
    else:
      manu = np.copy(manu0[:,:,0])
      for j in range(1,manu0.shape[2]):
        manu[manu0[:,:,j]==255] = 255
        manu[manu0[:,:,j]==128] = 128
        manu = morphology.area_opening(manu,0.001*manu.shape[0]*manu.shape[1])  # removal of small objects erroneously annotated (single pixels or little spots)
        manu = morphology.area_closing(manu,0.001*manu.shape[0]*manu.shape[1])  # removal of small holes errouneously not annotated

    #resizing
    img = img_as_ubyte(resize(img,[rsz,rsz]))
    manu = img_as_ubyte(resize(manu,[rsz,rsz]))
    manu[manu < 80] = 0
    manu[ (manu >= 80)*(manu <= 175) ] = 1
    manu[manu > 175] = 2

    # actual storage
    X_vl[n] = np.copy(img)
    Y_vl[n] = np.copy(manu)

# Y_vl = to_categorical(Y_vl, num_classes = NUM_CLASSES, dtype='float32')  # conversion to categorical data

%whos

#print(vl_images[0],vl_images[1])

110.tif 113.tif


In [None]:
# SAVING PRE-PROCESSED

np.savez(os.path.join('drive/MyDrive/cytology challenge condivisa/01_PRE-PROCESSED',pre_proc_name),X_tr=X_tr,X_vl=X_vl)
pre_proc_annotations_name = 'IL1_manual_mask_' + str(rsz) + 'x' + str(rsz)  # <------------------------------------------------- comment this line if preprocessed annotations of wanted size are already existing
np.savez(os.path.join('drive/MyDrive/cytology challenge condivisa/01_PRE-PROCESSED',pre_proc_annotations_name),Y_tr=Y_tr,Y_vl=Y_vl)  # <-- comment this line if preprocessed annotations of wanted size are already existing