In [None]:
import glob
import os
import random
from typing import Optional, Tuple, List

import cv2
import numpy as np
import pandas as pd
import torch
import torchvision.transforms as TF
from tqdm import tqdm

In [None]:
"""
Here, please refer to and copy this snippet https://github.com/naoto0804/SynShadow/issues/6#issuecomment-892511226

@article{inoue2021learning,
  title={{Learning from Synthetic Shadows for Shadow Detection and Removal}},
  author={Inoue, Naoto and Yamasaki, Toshihiko},
  journal={IEEE Transactions on Circuits and Systems for Video Technology},
  year={2021},
  volume={31},
  number={11},
  pages={4187-4197},
  doi={10.1109/TCSVT.2020.3047977}
}
"""

In [None]:
def generate(shadow_free: torch.Tensor, mask: torch.Tensor) -> torch.Tensor:
    assert shadow_free.ndim == 3 and mask.ndim == 3
    assert shadow_free.size()[1:] == mask.size()[1:]
    assert 0.0 <= shadow_free.min() and shadow_free.max() <= 1.0
    assert 0.0 <= mask.min() and mask.max() <= 1.0
    darker_image = darken(shadow_free, x_turb_sigma=0.0)
    # to randomize shadows color
    # darker_image = darken(shadow_free, intercepts_mode="affine_unsync")
    shadow = mask * darker_image
    shadow += (1 - mask) * shadow_free
    return shadow

In [None]:
def set_seed(seed: int = 42) -> None:
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

In [None]:
def get_mask(img: np.ndarray) -> np.ndarray:
    mask = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) if img.ndim == 3 else img
    mask = np.where(mask == 255, 0, 255 - mask)
    mask = mask[:, :, np.newaxis] / 255
    return mask


def syn_picture_text(
    pic: np.ndarray, text: np.ndarray, text_mask: np.ndarray
) -> np.ndarray:
    pic_text = np.array(((1 - text_mask) * pic + text_mask * text), dtype=np.uint8)
    return pic_text


def syn_texture(
    img: np.ndarray,
    texture: np.ndarray,
    text_mask: np.ndarray,
    seg_mask: np.ndarray,
    alpha: float = 0.7,
    beta: float = 0.3,
    gamma: float = 0.0,
    color_weight: Optional[Tuple[float, float]] = None,
) -> np.float:
    if color_weight is None:
        dst = cv2.addWeighted(img, alpha, texture, beta, gamma)
    else:
        dst_back = cv2.addWeighted(img, alpha, texture, beta, gamma)
        dst_text = cv2.addWeighted(
            img, color_weight[0], texture, color_weight[1], gamma
        )
        dst = np.array(
            (
                (1 - (text_mask + seg_mask)) * dst_back
                + (text_mask + seg_mask) * dst_text
            ),
            dtype=np.uint8,
        )
    return dst


def syn_background(
    img: np.ndarray,
    background: np.ndarray,
    text_mask: np.ndarray,
    seg_mask: np.ndarray,
    ratio: float = 1.1,
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    h, w, c = img.shape
    if h * 3 / w < 4:
        bw = w
        bh = w * 4 / 3
    else:
        bw = h * 3 / 4
        bh = h
    background = cv2.resize(
        background, (int(bw * ratio), int(bh * ratio)), interpolation=cv2.INTER_CUBIC
    )
    bh, bw, c = background.shape
    on_back = background.copy()
    dh = 0 if bh == h else np.random.randint(0, bh - h + 1)
    dw = 0 if bw == w else np.random.randint(0, bw - w + 1)
    on_back[dh : h + dh, dw : w + dw] = img

    on_back_text = np.zeros_like(on_back[:, :, 0:1]).astype(np.float64)
    on_back_text[dh : h + dh, dw : w + dw] = text_mask

    on_back_seg = np.zeros_like(on_back[:, :, 0:1]).astype(np.float64)
    on_back_seg[dh : h + dh, dw : w + dw] = seg_mask

    paper_mask = np.zeros_like(on_back[:, :, 0:1]).astype(np.float64)
    paper_mask[dh : h + dh, dw : w + dw] = 1.0

    return on_back, on_back_text, on_back_seg, paper_mask


def syn_shadow(img: np.ndarray, shadow: np.ndarray, scale: float = 0.5) -> np.ndarray:
    t = TF.ToTensor()
    img_tensor = t(img)
    shadow_tensor = t(shadow)
    shadowed = generate(img_tensor, shadow_tensor * scale)
    shadowed = (shadowed.detach().cpu().numpy().transpose(1, 2, 0) * 255).astype(
        np.uint8
    )
    return shadowed


def perspective_trans(
    img: np.ndarray,
    shadowed_img: np.ndarray,
    text_mask: np.ndarray,
    seg_mask: np.ndarray,
    paper_mask: np.ndarray,
    scale: float = 1.05,
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    scale = scale - 1
    h, w, c = img.shape
    p_original = np.float32([[0, 0], [w, 0], [0, h], [w, h]])
    dw = [np.random.randint(0, int(w * scale)) for _ in range(4)]
    dh = [np.random.randint(0, int(h * scale)) for _ in range(4)]
    p_trans = np.float32(
        [
            [0 - dw[0], 0 - dh[0]],
            [w + dw[1], 0 - dh[1]],
            [0 - dw[2], h + dh[2]],
            [w + dw[3], h + dh[3]],
        ]
    )

    M = cv2.getPerspectiveTransform(p_original, p_trans)
    dst = cv2.cvtColor(img, cv2.COLOR_BGR2BGRA)
    dst_shadowed = cv2.cvtColor(shadowed_img, cv2.COLOR_BGR2BGRA)

    i_trans = cv2.warpPerspective(dst, M, (w, h))
    i_trans_shadowed = cv2.warpPerspective(dst_shadowed, M, (w, h))
    text_mask = cv2.warpPerspective(text_mask, M, (w, h))
    seg_mask = cv2.warpPerspective(seg_mask, M, (w, h))
    paper_mask = cv2.warpPerspective(paper_mask, M, (w, h))

    return i_trans, i_trans_shadowed, text_mask, seg_mask, paper_mask


def get_average_color(x):
    b, g, r = x[:, 0], x[:, 1], x[:, 2]

    return np.array([np.mean(b), np.mean(g), np.mean(r)])

In [None]:
set_seed()

root_path = "../dataset/FSDSRD"
dirs = ["img", "gt", "text_mask", "figure_mask", "seg_mask", "shadow", "avg_color"]
for dir in dirs:
    if not os.path.exists(os.path.join(root_path, dir)):
        os.mkdir(os.path.join(root_path, dir))

shadows = glob.glob("../data/shadows/*")
textures = glob.glob("../data/textures/*")
backgrounds = glob.glob("../data/backgrounds/*")
text_paths = glob.glob("../data/EN/*/orig_texts/*")

nums = len(text_paths)
h = 1200
w = 900
c = 3

df = pd.DataFrame()
img_paths = []
gt_paths = []
shadow_mask_paths = []
text_mask_paths = []
figure_mask_paths = []

background_colors: List = [[], [], []]

k = 0
for i in tqdm(range(nums)):
    path = text_paths[i]
    picture = cv2.imread(path.replace("orig_texts", "orig_backgrounds"))
    picture = cv2.cvtColor(picture, cv2.COLOR_BGR2RGB)
    text = cv2.imread(path)

    for j in range(1):
        num = str(k).zfill(5)
        texture = cv2.imread(textures[np.random.randint(0, len(textures))])
        texture = cv2.resize(texture, (w, h), interpolation=cv2.INTER_CUBIC)

        text = cv2.resize(text, (w, h), interpolation=cv2.INTER_CUBIC)
        picture = cv2.resize(picture, (w, h), interpolation=cv2.INTER_CUBIC)

        background = cv2.imread(backgrounds[np.random.randint(0, len(backgrounds))])
        shadow = cv2.imread(shadows[np.random.randint(0, len(shadows))], 0)

        text_mask = get_mask(text)
        seg_mask = get_mask(picture)
        syn_pic = syn_picture_text(picture, text, text_mask)
        color_weight = np.random.uniform(low=0.7, high=1.0)
        ab = np.random.uniform(low=0.3, high=0.8)

        syn_tex = syn_texture(
            syn_pic,
            texture,
            text_mask=text_mask,
            seg_mask=seg_mask,
            alpha=1 - ab,
            beta=ab,
            color_weight=(color_weight, 1 - color_weight),
        )
        ratio = np.random.uniform(low=1.00, high=1.01)
        syn_back, text_mask, seg_mask, paper_mask = syn_background(
            syn_tex, background, text_mask, seg_mask, ratio=ratio
        )  # 1.00-1.15

        texture = cv2.resize(texture, (w, h), interpolation=cv2.INTER_CUBIC)
        texture_flatten = texture.flatten().reshape(h * w, c)
        avg_color = get_average_color(texture_flatten)
        avg_color = 255 * (1 - ab) + avg_color * ab

        for m in range(3):
            background_colors[m].append(avg_color[m])

        shadow = cv2.resize(shadow, (w, h), interpolation=cv2.INTER_CUBIC)
        syn_back = cv2.resize(syn_back, (w, h), interpolation=cv2.INTER_CUBIC)

        syn_shadowed = syn_shadow(syn_back, shadow, scale=1)
        scale = np.random.uniform(low=1.01, high=1.1)
        (
            trans_img,
            trans_shadowed_img,
            text_mask,
            fig_mask,
            paper_mask,
        ) = perspective_trans(
            syn_back, syn_shadowed, text_mask, seg_mask, paper_mask, scale=scale
        )

        text_mask = cv2.resize(text_mask, (w, h), interpolation=cv2.INTER_CUBIC)
        fig_mask = cv2.resize(fig_mask, (w, h), interpolation=cv2.INTER_CUBIC)
        fig_mask = cv2.morphologyEx(
            np.where(fig_mask > 0, 1, 0).astype(np.uint8), cv2.MORPH_CLOSE, (10, 10)
        )
        # paper_mask = cv2.resize(paper_mask, (w, h), interpolation=cv2.INTER_CUBIC)

        cv2.imwrite(os.path.join(root_path, f"img/{num}.png"), trans_shadowed_img)
        cv2.imwrite(os.path.join(root_path, f"gt/{num}.png"), trans_img)
        cv2.imwrite(
            os.path.join(root_path, f"text_mask/{num}.png"),
            (text_mask * 255).astype(np.uint8),
        )
        cv2.imwrite(
            os.path.join(root_path, f"figure_mask/{num}.png"),
            (fig_mask * 255).astype(np.uint8),
        )
        # cv2.imwrite(os.path.join(root_path, f"paper_mask/{num}.png"), (paper_mask*255).astype(np.uint8))
        cv2.imwrite(
            os.path.join(root_path, f"shadow/{num}.png"), (shadow).astype(np.uint8)
        )
        cv2.imwrite(
            os.path.join(root_path, f"seg_mask/{num}.png"),
            ((text_mask + fig_mask) * 255).astype(np.uint8),
        )
        cv2.imwrite(
            os.path.join(root_path, f"avg_color/{num}.png"),
            np.full_like(texture_flatten, avg_color).reshape(h, w, c),
        )

        img_paths.append(f"dataset/FSDSRD/img/{num}.png")
        gt_paths.append(f"dataset/FSDSRD/gt/{num}.png")
        shadow_mask_paths.append(f"dataset/FSDSRD/shadow/{num}.png")
        text_mask_paths.append(f"dataset/FSDSRD/text_mask/{num}.png")
        figure_mask_paths.append(f"dataset/FSDSRD/seg_mask/{num}.png")

        k += 1

df["img"] = img_paths
df["gt"] = gt_paths
df["B"], df["G"], df["R"] = (
    background_colors[0],
    background_colors[1],
    background_colors[2],
)
df["shadow_mask"] = shadow_mask_paths
df["text_mask"] = text_mask_paths
df["figure_mask"] = figure_mask_paths

df.to_csv("../csv/FSDSRD/dataset.csv")