In [None]:
import os
import pickle
import random

import albumentations as A
import cv2
import numpy as np
from tqdm import tqdm
import math

import pandas as pd

In [None]:
fid = open("../data/cache/train_gt", 'rb')
train_pickle = pickle.Unpickler(fid, encoding="latin1")
train_gt = train_pickle.load()

In [None]:
fid = open("../data/cache/train_no_gt", 'rb')
train_no_gt_pickle = pickle.Unpickler(fid, encoding="latin1")
train_no_gt = train_no_gt_pickle.load()

In [None]:
image_size = (512, 512)

In [None]:
output_path = "../data/gts"
if not os.path.exists(output_path):
    os.makedirs(output_path)

In [None]:
horizontal_transform = A.Compose([
    A.HorizontalFlip(p=1),
], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['nodule']))

vertical_transform = A.Compose([
    A.VerticalFlip(p=1),
], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['nodule']))

rotate90_transform = A.Compose([
    A.Rotate(limit=(90, 90), p=1)
], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['nodule']))

rotate180_transform = A.Compose([
    A.Rotate(limit=(180, 180), p=1)
], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['nodule']))

rotate270_transform = A.Compose([
    A.Rotate(limit=(-90, -90), p=1)
], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['nodule']))

transforms = [horizontal_transform, vertical_transform, rotate90_transform, rotate180_transform,
              rotate270_transform]

transform_names = ["h", "v", "r90", "r180", "r270"]

In [None]:
bbox_safe_transform = A.Compose([
    A.Rotate(),
    A.RandomSizedBBoxSafeCrop(image_size[0], image_size[1], p = 1),
], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['nodule']))

In [None]:
color = (0, 0, 255)

def generate_augment_gts():
    for gt in tqdm(train_gt):
        filepath = gt["filepath"]
        filepath = "../" + filepath

        series_id = filepath.split("/")[-2]

        z = filepath.split("/")[-1].split(".")[-2]

        bboxes = gt["bboxes"]

        image = cv2.imread(filepath)

        class_labels = ['nodule'] * len(bboxes)

        output_folder =  "{}/{}".format(output_path, series_id)
        if not os.path.exists(output_folder):
            os.makedirs(output_folder)

        origin_output_path = "{}/{}-ori.jpg".format(output_folder, z)
        cv2.imwrite(origin_output_path, image)
        anno = {'bboxes': bboxes, 'ignoreareas': np.array([]),
                'filepath': origin_output_path[3:]}
        augment_gts.append(anno)

        # cv2.rectangle(image, (bboxes[0][0], bboxes[0][1]),
        #               (bboxes[0][2], bboxes[0][3]), color, 1)
        #
        # cv2.rectangle(image, (bboxes[1][0], bboxes[1][1]),
        #               (bboxes[1][2], bboxes[1][3]), color, 1)
        #
        # cv2.imwrite("test/patched.jpg", image)

        for i in range(len(transforms)):
            transformed = transforms[i](image=image, bboxes=bboxes, nodule=class_labels)
            transformed_image = transformed['image']
            transformed_bbox = transformed['bboxes']

            for j in range(len(transformed_bbox)):
                transformed_bbox[j] = list(map(lambda x: math.ceil(x), transformed_bbox[j]))

            output_image_path = "{}/{}/{}-{}.jpg".format(output_path, series_id, z, transform_names[i])
            anno = {'bboxes': transformed_bbox, 'ignoreareas': np.array([]),
                    'filepath': output_image_path[3:]}

            # cv2.rectangle(transformed_image, (transformed_bbox[0][0], transformed_bbox[0][1]),
            #               (transformed_bbox[0][2], transformed_bbox[0][3]), color, 1)
            # cv2.rectangle(transformed_image, (transformed_bbox[1][0], transformed_bbox[1][1]),
            #               (transformed_bbox[1][2], transformed_bbox[1][3]), color, 1)
            # cv2.imwrite("test/{}.jpg".format(i), transformed_image)
            cv2.imwrite(output_image_path, transformed_image)
            augment_gts.append(anno)

        for i in range(45):
            transformed = bbox_safe_transform(image=image, bboxes=bboxes, nodule=class_labels)
            bbox_safe_image = transformed['image']
            bbox_safe_bbox = transformed['bboxes']

            for j in range(len(bbox_safe_bbox)):
                bbox_safe_bbox[j] = list(map(lambda x: math.ceil(x), bbox_safe_bbox[j]))

            # cropped_image = cv2.rectangle(cropped_image, (
            #     bbox_safe_bbox[0][0], bbox_safe_bbox[0][1]), (bbox_safe_bbox[0][2],
            #                                                   bbox_safe_bbox[0][3]),
            #                                                           color, 1)
            # if len(bbox_safe_bbox) > 1:
            #     cropped_image  = cv2.rectangle(cropped_image, (bbox_safe_bbox[1][0], bbox_safe_bbox[1][1]), (bbox_safe_bbox[1][2],bbox_safe_bbox[1][3]), color, 1)
            #     print(bbox_safe_bbox[1])
            #     cv2.imwrite("test/croped-{}.jpg".format(i + 1), cropped_image)

            output_image_path = "{}/{}/{}-cropped-{}.jpg".format(output_path, series_id, z, i + 1)
            anno = {'bboxes': bbox_safe_bbox, 'ignoreareas': np.array([]),
                    'filepath': output_image_path[3:]}

            augment_gts.append(anno)

            cv2.imwrite(output_image_path, bbox_safe_image)

In [None]:
def generate_no_gts():
    no_gt_ratio = 2
    no_gt_size = math.ceil(len(augment_gts) * no_gt_ratio)
    image_data_no_gt_sampled = random.sample(train_no_gt, no_gt_size)
    image_data_no_gt_sampled_df = pd.DataFrame(image_data_no_gt_sampled)

    no_gt_cache_sampled_path = "../data/cache/train_no_gt_sample"
    image_data_no_gt_sampled_df.set_index("filepath", inplace=True)
    image_data_no_gt_sampled_df.to_csv("output/no_gt_sample.csv")

    with open(no_gt_cache_sampled_path, 'wb') as fid:
        pickle.dump(image_data_no_gt_sampled, fid, 2)
        fid.close()

    print(len(image_data_no_gt_sampled))

# Run

In [None]:
augment_gts = []

generate_augment_gts()

In [None]:
df = pd.DataFrame(augment_gts)
df.set_index("filepath", inplace=True)
df.to_csv("output/gt-augment.csv")
df.head()

In [None]:
augment_gt_cache_path = "../data/cache/train_gt_augment"
with open(augment_gt_cache_path, 'wb') as fid:
    pickle.dump(augment_gts, fid, 2)
    fid.close()

In [None]:
generate_no_gts()