**About** : This notebook is used to downscale images in the train and test set, in order to speed-up training and inference
  - Use the `FACTOR` parameter to specify the downscaling factor. We recommend generating data of downscaling 2 and 4.
  - For training data, we save extra time by also computing downscaling rles. Use the `NAME` parameter to specify which rle to downscale.
  - It is only require to save the downscaled images once, use the `SAVE_IMG` parameters to this extent.

In [None]:
%load_ext autoreload
%autoreload 2

### Imports

In [None]:
import os
import gc
import cv2
import sys
import tifffile
import numpy as np
import pandas as pd

from tqdm.notebook import tqdm
from collections import Counter
from matplotlib import pyplot as plt

sys.path.append("../code/")

In [None]:
from data.dataset import load_image
from utils.rle import *
from params import *

In [None]:
FACTOR = 2

### Train

In [None]:
out_dir = DATA_PATH + f"train_{FACTOR}/"
if not os.path.exists(out_dir):
    os.mkdir(out_dir)

In [None]:
# NAME = "_onlyfc"  # unhealthy class
NAME = "_fix"  # healthy class with fixed issues
# NAME = ""  # original data
SAVE_IMG = False

df_masks = pd.read_csv(DATA_PATH + "train" + NAME + ".csv").set_index("id")

In [None]:
masks = {}

for index, encs in tqdm(df_masks.iterrows(), total=len(df_masks)):
    # read image and generate the mask
    img = load_image(os.path.join(TIFF_PATH, index + ".tiff"))
    mask = enc2mask(encs, (img.shape[1], img.shape[0]))

    if SAVE_IMG:
        img = cv2.resize(
            img,
            (img.shape[1] // FACTOR, img.shape[0] // FACTOR),
            interpolation=cv2.INTER_AREA,
        )
        tifffile.imsave(out_dir + f"{index}.tiff", img)

    mask = cv2.resize(
        mask,
        (mask.shape[1] // FACTOR, mask.shape[0] // FACTOR),
        interpolation=cv2.INTER_NEAREST,
    )
    
    rle = mask2enc(mask)
    
    masks[index] = rle
    
#     break

In [None]:
df_masks = pd.DataFrame.from_dict(masks).T.reset_index().rename(columns={0: "encoding", "index": "id"})

df_masks.to_csv(f"{DATA_PATH}train_{FACTOR}{NAME}.csv", index=False)

print(f"Saved data to {DATA_PATH}train_{FACTOR}{NAME}.csv")

### Test

In [None]:
out_dir = DATA_PATH + f"test_{FACTOR}/"

if not os.path.exists(out_dir):
    os.mkdir(out_dir)

In [None]:
df = pd.read_csv(DATA_PATH + "sample_submission.csv")

In [None]:
for index in tqdm(df['id']):
    # read image and generate the mask
    img = load_image(os.path.join(TIFF_PATH_TEST, index + ".tiff"))

    img = cv2.resize(
        img,
        (img.shape[1] // FACTOR, img.shape[0] // FACTOR),
        interpolation=cv2.INTER_AREA,
    )
    
    tifffile.imsave(out_dir + f"{index}.tiff", img)

In [None]:
for index in tqdm(EXTRA_IMGS):
    # read image and generate the mask
    img = load_image(os.path.join(TIFF_PATH_TEST, index + ".tiff"))

    img = cv2.resize(
        img,
        (img.shape[1] // FACTOR, img.shape[0] // FACTOR),
        interpolation=cv2.INTER_AREA,
    )
    
#     tifffile.imsave(out_dir + f"{index}.tiff", img)