In [4]:
# %load_ext nb_black
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Imports

In [5]:
import os
import gc
import cv2
import sys
import tifffile
import numpy as np
import pandas as pd

from tqdm.notebook import tqdm
from collections import Counter
from matplotlib import pyplot as plt

sys.path.append("../code/")

In [6]:
from data.dataset import load_image
from utils.rle import *
from params import *

In [7]:
FACTOR = 4

### Train

In [8]:
out_dir = DATA_PATH + f"train_{FACTOR}/"

In [9]:
df_masks = pd.read_csv(DATA_PATH + "train.csv").set_index("id")

In [10]:
if not os.path.exists(out_dir):
    os.mkdir(out_dir)

In [None]:
masks = {}

for index, encs in tqdm(df_masks.iterrows(), total=len(df_masks)):
    # read image and generate the mask
    img = load_image(os.path.join(TIFF_PATH, index + ".tiff"))
    mask = enc2mask(encs, (img.shape[1], img.shape[0]))

    img = cv2.resize(
        img,
        (img.shape[1] // FACTOR, img.shape[0] // FACTOR),
        interpolation=cv2.INTER_AREA,
    )

    mask = cv2.resize(
        mask,
        (mask.shape[1] // FACTOR, mask.shape[0] // FACTOR),
        interpolation=cv2.INTER_NEAREST,
    )
    
    rle = mask2enc(mask)
    
    masks[index] = rle
    
    tifffile.imsave(out_dir + f"{index}.tiff", img)

In [None]:
df_masks = pd.DataFrame.from_dict(masks).T.reset_index().rename(columns={0: "encoding", "index": "id"})

In [None]:
from params import DATA_PATH
df_masks.to_csv(f"{DATA_PATH}train_{FACTOR}.csv", index=False)

### Test

In [11]:
out_dir = DATA_PATH + f"test_{FACTOR}/"

if not os.path.exists(out_dir):
    os.mkdir(out_dir)

In [12]:
df = pd.read_csv(DATA_PATH + "sample_submission.csv")

In [14]:
TIFF_PATH = '../input/test/'

In [15]:
masks = {}

for index in tqdm(df['id']):
    # read image and generate the mask
    img = load_image(os.path.join(TIFF_PATH, index + ".tiff"))

    img = cv2.resize(
        img,
        (img.shape[1] // FACTOR, img.shape[0] // FACTOR),
        interpolation=cv2.INTER_AREA,
    )
    
    tifffile.imsave(out_dir + f"{index}.tiff", img)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))




### LAB Stats

In [None]:
from data.transforms import get_lab_stats

In [None]:
df_masks = pd.read_csv(f"../input/train_{FACTOR}.csv")

In [None]:
stats_dic = {}

for index, encs in tqdm(df_masks.values):
    # read image and generate the mask
    img = load_image(os.path.join(out_dir, index + ".tiff"), full_size=False)
    
    stats = get_lab_stats(img)
    
    stats_dic[index] = stats
    
    del img
    gc.collect()