In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
import os
import sys
import tifffile
import imageio
import glob
import shutil
import glob
import shutil
import time
import cv2

In [2]:
!pip install pycocotools

Collecting pycocotools
  Downloading pycocotools-2.0.4.tar.gz (106 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m106.6/106.6 kB[0m [31m273.3 kB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l- \ | / - \ | / - \ | / - done
[?25h  Getting requirements to build wheel ... [?25l- \ | / done
[?25h  Preparing metadata (pyproject.toml) ... [?25l- \ | / done
Building wheels for collected packages: pycocotools
  Building wheel for pycocotools (pyproject.toml) ... [?25l- \ | / - \ | / - \ | done
[?25h  Created wheel for pycocotools: filename=pycocotools-2.0.4-cp37-cp37m-linux_x86_64.whl size=370078 sha256=4ce4a6243508af57c67d801dd45668ba2dc417ea7b466fd327feeb6dcfa35f77
  Stored in directory: /root/.cache/pip/wheels/a3/5f/fa/f011e578cc76e1fc5be8dce30b3eb9fd00f337e744b3bba59b
Successfully built pycocotools
Installing collected packages: pycocotools
Successfully installe

In [3]:
# Original Image Dimensions
HEIGHT = 520
WIDTH = 704
SHAPE = (HEIGHT, WIDTH)

In [4]:
def rles_to_mask(encs, shape=SHAPE):
    """
    Decodes a rle.

    Args:
        encs (list of str): Rles for each class.
        shape (tuple [2]): Mask size.

    Returns:
        np array [shape]: Mask.
    """
    img = np.zeros(shape[0] * shape[1], dtype=np.uint)
    if type(encs)==float:
        return img
    for m, enc in enumerate(encs):
        if isinstance(enc, np.float) and np.isnan(enc):
            continue
        enc_split = enc.split()
        for i in range(len(enc_split) // 2):
            start = int(enc_split[2 * i]) - 1
            length = int(enc_split[2 * i + 1])
            img[start: start + length] = 1 + m
    return img.reshape(shape)

## <font color='emperal'/> **Prepare LIVECell data**

In [5]:
import sys
sys.path.append('../input/sartorius-helper-package')
from livecell_dataset import LiveCell_Dataset

In [6]:
# Define path constances
SHSY5Y_LIVECELL_TRAIN_IMAGE_FOLDER = '../input/sartorius-cell-instance-segmentation/LIVECell_dataset_2021/images/livecell_train_val_images/SHSY5Y'
SHSY5Y_LIVECELL_TRAIN_ANNOTATION_PATH = '../input/sartorius-cell-instance-segmentation/LIVECell_dataset_2021/annotations/LIVECell_single_cells/shsy5y/livecell_shsy5y_train.json'
SHSY5Y_LIVECELL_VAL_ANNOTATION_PATH = '../input/sartorius-cell-instance-segmentation/LIVECell_dataset_2021/annotations/LIVECell_single_cells/shsy5y/livecell_shsy5y_val.json'

In [7]:
!mkdir ./train ./val

In [8]:
livecell_dataset = LiveCell_Dataset(image_folder=SHSY5Y_LIVECELL_TRAIN_IMAGE_FOLDER, 
                                    train_annotation_path=SHSY5Y_LIVECELL_TRAIN_ANNOTATION_PATH,
                                    val_annotation_path=SHSY5Y_LIVECELL_VAL_ANNOTATION_PATH,
                                    shape_target=SHAPE,
                                    shape=SHAPE)

# Convert polygon mask to rle
livecell_dataset.convert_polygon_to_rle()

# Save images to working dir
livecell_dataset.save_image_to_working_dir('train', pad_img=False)
livecell_dataset.save_image_to_working_dir('val', pad_img=False)

# Get LiveCell DataFrame
livecell_train_df = livecell_dataset.create_livecell_dataframe(img_group='train')
livecell_val_df = livecell_dataset.create_livecell_dataframe(img_group='val')

100%|██████████| 165397/165397 [03:36<00:00, 765.40it/s]
100%|██████████| 28199/28199 [00:36<00:00, 764.46it/s]
449it [01:06,  6.73it/s]
79it [00:12,  6.47it/s]
165397it [00:00, 493232.99it/s]
28199it [00:00, 479569.14it/s]


In [9]:
!zip -r -q livecell_train.zip train
!zip -r -q livecell_val.zip val

In [10]:
#livecell_train_df = livecell_train_df.groupby('id')['annotation'].agg(list).reset_index()
#livecell_val_df = livecell_val_df.groupby('id')['annotation'].agg(list).reset_index()

In [11]:
livecell_train_df

Unnamed: 0,id,annotation,cell_type
0,SHSY5Y_Phase_C10_2_00d04h00m_3,89136 1 89837 5 90540 8 91244 9 91949 9 92653 ...,shsy5y
1,SHSY5Y_Phase_C10_2_00d04h00m_3,185410 2 186114 13 186132 7 186817 29 187520 3...,shsy5y
2,SHSY5Y_Phase_C10_2_00d04h00m_3,183343 2 184044 5 184738 4 184747 7 185441 17 ...,shsy5y
3,SHSY5Y_Phase_C10_2_00d04h00m_3,165739 2 166442 3 167144 5 167847 7 168550 8 1...,shsy5y
4,SHSY5Y_Phase_C10_2_00d04h00m_3,173 8 876 10 1580 10 2283 12 2988 11 3692 11 4...,shsy5y
...,...,...,...
165392,SHSY5Y_Phase_B10_1_00d12h00m_3,25415 7 26115 18 26817 23 27519 28 28222 32 28...,shsy5y
165393,SHSY5Y_Phase_B10_1_00d12h00m_3,20500 6 21203 8 21905 11 22609 11 23312 13 240...,shsy5y
165394,SHSY5Y_Phase_B10_1_00d12h00m_3,80509 4 81211 7 81914 9 82603 2 82617 10 83308...,shsy5y
165395,SHSY5Y_Phase_B10_1_00d12h00m_3,93177 6 93879 10 94581 14 95284 16 95986 20 96...,shsy5y


In [12]:
livecell_train_df.to_csv('./livecell_train.csv', index=False)

In [13]:
livecell_val_df

Unnamed: 0,id,annotation,cell_type
0,SHSY5Y_Phase_B10_2_00d00h00m_4,251701 2 252405 3 253108 4 253800 1 253812 5 2...,shsy5y
1,SHSY5Y_Phase_B10_2_00d00h00m_4,243238 6 243940 9 244642 12 245345 14 246047 1...,shsy5y
2,SHSY5Y_Phase_B10_2_00d00h00m_4,268237 7 268940 9 269643 11 270347 11 271050 1...,shsy5y
3,SHSY5Y_Phase_B10_2_00d00h00m_4,284441 5 285144 7 285847 9 286550 11 287254 12...,shsy5y
4,SHSY5Y_Phase_B10_2_00d00h00m_4,290772 2 291476 4 292179 7 292883 9 293587 11 ...,shsy5y
...,...,...,...
28194,SHSY5Y_Phase_B10_1_02d08h00m_2,222442 2 223144 5 223846 7 224549 9 225252 10 ...,shsy5y
28195,SHSY5Y_Phase_B10_1_02d08h00m_2,427 3 1131 4 1836 5 2540 6 3245 6 3949 7 4654 ...,shsy5y
28196,SHSY5Y_Phase_B10_1_02d08h00m_2,422 4 1124 7 1827 8 2530 10 3234 11 3939 10 46...,shsy5y
28197,SHSY5Y_Phase_B10_1_02d08h00m_2,404 17 1108 16 1813 14 2523 8 3228 6 3933 4 46...,shsy5y


In [14]:
livecell_val_df.to_csv('./livecell_val.csv', index=False)