In [1]:
# %load_ext nb_black
%load_ext autoreload
%autoreload 2

### Imports

In [2]:
import os
import cv2
import sys
import tifffile
import numpy as np
import pandas as pd

from tqdm.notebook import tqdm
from collections import Counter
from matplotlib import pyplot as plt

sys.path.append("../code/")

In [3]:
from data.dataset import load_image
from utils.rle import *
from params import *

### Preparation

In [4]:
FACTOR = 4

In [5]:
out_dir = DATA_PATH + f"train_{FACTOR}/"

In [6]:
df_masks = pd.read_csv(DATA_PATH + "train.csv").set_index("id")

In [7]:
if not os.path.exists(out_dir):
    os.mkdir(out_dir)

### Loop

In [31]:
masks = {}

for index, encs in tqdm(df_masks.iterrows(), total=len(df_masks)):
    # read image and generate the mask
    img = load_image(os.path.join(TIFF_PATH, index + ".tiff"))
    mask = enc2mask(encs, (img.shape[1], img.shape[0]))

    img = cv2.resize(
        img,
        (img.shape[1] // FACTOR, img.shape[0] // FACTOR),
        interpolation=cv2.INTER_AREA,
    )

    mask = cv2.resize(
        mask,
        (mask.shape[1] // FACTOR, mask.shape[0] // FACTOR),
        interpolation=cv2.INTER_NEAREST,
    )
    
    rle = mask2enc(mask)
    
    masks[index] = rle
    
#     tifffile.imsave(out_dir + f"{index}.tiff", img)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=8.0), HTML(value='')))




In [52]:
df_masks = pd.DataFrame.from_dict(masks).T.reset_index().rename(columns={0: "encoding", "index": "id"})

In [53]:
df_masks

Unnamed: 0,id,encoding
0,2f6ecfcdf,18509325 4 18509331 6 18517140 20 18524955 28 ...
1,aaa6a05cc,1938681 15 1943301 19 1947920 24 1952540 28 19...
2,cb2d976f4,4887497 1 4896226 11 4904955 21 4913684 31 492...
3,0486052bb,6358179 2 6364624 4 6371069 7 6377513 10 63775...
4,e79de561c,470499 21 474544 23 478588 25 482632 28 486677...
5,095bf7a1f,7092936 6 7102474 35 7112013 39 7121551 44 713...
6,54f2eec69,7795325 37 7802932 41 7810540 44 7818147 49 78...
7,1e2425f28,3097395 8 3104086 17 3110776 26 3117470 30 312...


In [55]:
df_masks.to_csv(f"../input/train_{FACTOR}.csv", index=False)

## LAB Stats

In [8]:
from data.transforms import get_lab_stats

In [10]:
stats_dic = {}

for index, encs in tqdm(df_masks.iterrows(), total=len(df_masks)):
    # read image and generate the mask
    img = load_image(os.path.join(TIFF_PATH, index + ".tiff"))
    
    stats = get_lab_stats(img)
    
    stats_dic[index] = stats

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=8.0), HTML(value='')))




In [11]:
stats_dic

{'2f6ecfcdf': (array([145.15535701, 137.99020286, 118.54792884]),
  array([89.94705177, 14.23873667, 13.47610648])),
 'aaa6a05cc': (array([160.37612878, 143.02428181, 114.15084724]),
  array([75.359951  , 17.53332275, 16.0875504 ])),
 'cb2d976f4': (array([149.2735253 , 140.2772118 , 117.25825769]),
  array([85.75920275, 16.05684167, 13.85500175])),
 '0486052bb': (array([150.91806562, 137.70516107, 118.47462954]),
  array([87.26843423, 13.69916489, 13.26900443])),
 'e79de561c': (array([157.9340989 , 146.50102633, 111.57854369]),
  array([55.07994246, 15.72905062, 14.46999661])),
 '095bf7a1f': (array([129.58890923, 144.69630213, 116.09852381]),
  array([79.53312429, 18.46779254, 13.0580684 ])),
 '54f2eec69': (array([149.5829189 , 143.24307541, 116.13484479]),
  array([73.73585502, 17.1945404 , 14.00904746])),
 '1e2425f28': (array([133.55921634, 153.74112745, 111.1193993 ]),
  array([66.38893461, 20.50278703, 13.87867939]))}