In [None]:
# Change working directory
%cd drive/MyDrive/DAT/Vingroup/CV/PCA

/content/drive/MyDrive/DAT/Vingroup/CV/PCA


In [None]:
# Set up environment
import numpy as np
import cv2
import os
from google.colab.patches import cv2_imshow

In [None]:
# Constants
WLI_FOLDER = '../raw_data/WLI'
GT_FOLDER = '../raw_data/GT'
POS_MARGIN = 30
NEG_MARGIN = 25
OUT_DIR = 'data'

In [None]:
def load_data(im_dir, gt_dir):
    """
    Load images and their ground truth.
    Params:
        im_dir (string): the folder containing images
        gt_dir (string): the folder containing ground truth
    Return:
        images (list of ndarray): images
        ground_truths (list of ndarray): ground truth
    """
    images = []
    ground_truths = []
    for filename in os.listdir(im_dir):
        images.append(cv2.imread(os.path.join(im_dir, filename)))
        ground_truth = cv2.imread(os.path.join(gt_dir, filename), cv2.IMREAD_GRAYSCALE)
        ground_truth = (ground_truth > 127).astype(np.uint8) * 255
        ground_truths.append(ground_truth)
    return images, ground_truths

In [None]:
def get_data_points(images, ground_truths, pos_margin, neg_margin):
    """
    Get data points to use PCA on.
    Params:
        images (list of ndarray): images
        ground_truths (list of ndarray): ground truth
        pos_margin (int): positive margin
        neg_margin (int): negative margin
    Return:
        positives (m x 3 ndarray): positive data points
        negatives (n x 3 ndarray): negative data points
    """
    num_images = len(images)
    positives = []
    negatives = []
    kernel = np.ones((3, 3), np.uint8)
    for i in range(num_images):
        img = images[i]
        gt = ground_truths[i]
        # Get positive region
        erosion = cv2.erode(gt, kernel, iterations=pos_margin)
        pos = gt - erosion
        positives.append(img[pos == 255])
        # Get negative region
        dilation = cv2.dilate(gt, kernel, iterations=neg_margin)
        neg = dilation - gt
        negatives.append(img[neg == 255])
    positives = np.concatenate(positives)
    negatives = np.concatenate(negatives)
    return positives, negatives

In [None]:
def main():
    # Load raw data
    images, ground_truths = load_data(WLI_FOLDER, GT_FOLDER)
    # Change color space
    rgb = [cv2.cvtColor(img, cv2.COLOR_BGR2RGB) for img in images]
    hsv = [cv2.cvtColor(img, cv2.COLOR_BGR2HSV) for img in images]
    lab = [cv2.cvtColor(img, cv2.COLOR_BGR2Lab) for img in images]
    # Get data points
    rgb_positives, rgb_negatives = get_data_points(rgb, ground_truths, POS_MARGIN, NEG_MARGIN)
    hsv_positives, hsv_negatives = get_data_points(hsv, ground_truths, POS_MARGIN, NEG_MARGIN)
    lab_positives, lab_negatives = get_data_points(lab, ground_truths, POS_MARGIN, NEG_MARGIN)
    # Save data points to files
    rgb_out_dir = os.path.join(OUT_DIR, 'rgb')
    hsv_out_dir = os.path.join(OUT_DIR, 'hsv')
    lab_out_dir = os.path.join(OUT_DIR, 'lab')
    if not os.path.exists(rgb_out_dir):
        os.makedirs(rgb_out_dir)
    if not os.path.exists(hsv_out_dir):
        os.makedirs(hsv_out_dir)
    if not os.path.exists(lab_out_dir):
        os.makedirs(lab_out_dir)
    np.save(os.path.join(rgb_out_dir, 'positives.npy'), rgb_positives)
    np.save(os.path.join(rgb_out_dir, 'negatives.npy'), rgb_negatives)
    np.save(os.path.join(hsv_out_dir, 'positives.npy'), hsv_positives)
    np.save(os.path.join(hsv_out_dir, 'negatives.npy'), hsv_negatives)
    np.save(os.path.join(lab_out_dir, 'positives.npy'), lab_positives)
    np.save(os.path.join(lab_out_dir, 'negatives.npy'), lab_negatives)

In [None]:
main()