In [1]:
import sys
sys.path.append('../../30_data_tools/')
sys.path.append('../process_masks/')

In [2]:
from pathlib import Path
import random

In [3]:
from PIL import Image
import numpy as np
from tqdm import tqdm
from sklearn.model_selection import train_test_split

In [4]:
from load_dataset import get_available_moires, get_train_data, get_moire_path, get_non_moire_path

In [5]:
from helper import load_dotenv
from get_labelstudio_data import get_moires_of_project
from mask_functions import load_masks, get_whole_mask

In [6]:
import re

In [7]:
dotenv = load_dotenv()

In [8]:
IMG_SIZE = (
    280,
    280
)
MIN_MASK_PARTIAL = 0.5

In [9]:
DATASET_DIR = Path('./dataset/')

In [10]:
def get_masks_path( row ):
    return dotenv['DATA_DIR'] / row.job / row.variant_name / f"{row.pdf_filename}.masks.pkl"

In [11]:
def get_img_pair( row, dotenv ):
    non_moire_path = row.non_moire_path
    moire_path = row.moire_path
    
    non_moire_img_dpi = int(re.search(r'(\d+)dpi', row.variant_name).groups()[0])
    moire_img_dpi = int(re.match(r'.+\.4c_(\d+)$', row.moire_path.name.replace( row.moire_path.suffix, '' )).groups()[0])
    target_dpi = dotenv['TRAIN_DATA_DPI']
    
    non_moire_img = Image.open( non_moire_path )
    if target_dpi != non_moire_img_dpi:
        non_moire_img = non_moire_img.resize((
            round(non_moire_img.size[0] * (target_dpi / non_moire_img_dpi)),
            round(non_moire_img.size[1] * (target_dpi / non_moire_img_dpi))
        ))
        
    moire_img = Image.open( moire_path )
    original_size = moire_img.size
    if target_dpi != moire_img_dpi:
        moire_img = moire_img.resize((
            round(moire_img.size[0] * (target_dpi / moire_img_dpi)),
            round(moire_img.size[1] * (target_dpi / moire_img_dpi))
        ))
    moire_img = moire_img.resize(non_moire_img.size)

    rescale_factor = [
        moire_img.size[0] / original_size[0],
        moire_img.size[1] / original_size[1]
    ]
    
    bbox = [int(val) for val in row.bbox.split(';')]
    bbox = [
        int(round(bbox[0] * rescale_factor[0])),
        int(round(bbox[1] * rescale_factor[1])),
        int(round(bbox[2] * rescale_factor[0])),
        int(round(bbox[3] * rescale_factor[1]))
    ]
    
    cropped_non_moire_img = non_moire_img.crop((
        bbox[0],
        bbox[1],
        bbox[0] + bbox[2],
        bbox[1] + bbox[3]
    ))
    cropped_moire_img = moire_img.crop((
        bbox[0],
        bbox[1],
        bbox[0] + bbox[2],
        bbox[1] + bbox[3]
    ))

    cropped_non_moire_img = 1 - np.array(cropped_non_moire_img)[:,:,3] / 255
    cropped_moire_img = 1 - np.array(cropped_moire_img)[:,:,3] / 255
    
    return cropped_non_moire_img, cropped_moire_img

In [12]:
def cut_img_to_tiles( img, mask ):
    # convolve over image
    out = []
    conv_size = IMG_SIZE
    step_horizontal = int(round(conv_size[0] / 2))
    step_vertical = int(round(conv_size[1] / 2))

    # falls das Bild in mindestens einer Dimension zu klein ist
    if img.shape[1] < conv_size[0]:
        zero_img = np.zeros((
            img.shape[0],
            conv_size[0]
        )).astype('float32')
        
        zero_img[
            :,
            :img.shape[1]
        ] = img
        img = zero_img.copy()

    if img.shape[0] < conv_size[1]:
        zero_img = np.zeros((
            conv_size[1],
            img.shape[1]
        )).astype('float32')

        zero_img[
            :img.shape[0],
            :
        ] = img
        img = zero_img.copy()

    
    for left in range(0, img.shape[1] - step_horizontal, step_horizontal):
        for top in range(0, img.shape[0] - step_vertical, step_vertical):
            
            if left + conv_size[0] > img.shape[1]:
                left = img.shape[1] - conv_size[0]

            if top + conv_size[1] > img.shape[0]:
                top = img.shape[0] - conv_size[1]

            if mask:
                mask_partial = mask['mask'][
                    top:top+conv_size[1],
                    left:left+conv_size[0]
                ].mean()
    
                if mask_partial > MIN_MASK_PARTIAL:
                    out.append(
                        img[
                            top:top+conv_size[1],
                            left:left+conv_size[0]
                        ]
                    )
            else:
                out.append(
                    img[
                        top:top+conv_size[1],
                        left:left+conv_size[0]
                    ]
                )
                
    return out

In [25]:
def process_sample( row, dotenv ):
    bbox = [int(val) for val in row.bbox.split(';')]
    mask = [m for m in load_masks( get_masks_path( row ) ) if m['bbox'] == bbox][0]
    non_moire_img_dpi = int(re.search(r'(\d+)dpi', row.variant_name).groups()[0])
    
    non_moire_img, moire_img = get_img_pair( row, dotenv )

    moire_samples = [img for img in cut_img_to_tiles( moire_img, mask=mask)]
    non_moire_samples = [img for img in cut_img_to_tiles( non_moire_img, mask=mask)]

    return [(non_moire_samples[i], moire_samples[i]) for i in range(len(moire_samples))]

In [14]:
def test_train_val_split( data, labels, test_size=.2, val_size=.1 ):
    idx_list = [i for i in range(len(data))]
    random.shuffle(idx_list)
    test_limiter = round(len(idx_list) * test_size)
    val_limiter = test_limiter + round(len(idx_list) * val_size)

    train_data = [data[idx] for idx in idx_list[val_limiter:]]
    train_labels = [labels[idx] for idx in idx_list[val_limiter:]]

    test_data = [data[idx] for idx in idx_list[:test_limiter]]
    test_labels = [labels[idx] for idx in idx_list[:test_limiter]]

    val_data = [data[idx] for idx in idx_list[test_limiter:val_limiter]]
    val_labels = [labels[idx] for idx in idx_list[test_limiter:val_limiter]]

    return train_data, train_labels, test_data, test_labels, val_data, val_labels

In [15]:
def write_tiles( data, set_name ):
    set_dir = DATASET_DIR / set_name
    if set_dir.exists() == False:
        set_dir.mkdir()

    for i in tqdm(range(len(data))):
        entry = data[i]
        
        for j in range(len(entry)):
            parent_dir_name = 'no_moire' if j == 0 else 'moire'
            parent_dir = set_dir / parent_dir_name

            if parent_dir.exists() == False:
                parent_dir.mkdir()
            
            out_path = parent_dir / f"{str(i).zfill(4)}.jpg"
        
            img = Image.fromarray( np.uint8(data[i] * 255) ).convert('RGB')
            img.save( out_path, progressive=True )

In [26]:
def generate_tiles( sample, set_name ):
    combined_data = []

    for i in tqdm(range(sample.shape[0])):
        row = sample.iloc[i]
        combined_data += process_sample( row, dotenv )

    write_tiles( combined_data, set_name )

In [17]:
from get_labelstudio_data import get_results_of_project
import pandas as pd
import sqlite3

In [18]:
def get_available_moires():
    # results laden
    dotenv = load_dotenv()
    moire_results = [r for r in get_results_of_project(2) if 'checked_moire' in r['rectanglelabels']]
    
    results_frame = pd.DataFrame(
        [
            (
                r['img_name'],
                ";".join([
                    str(r['value']['x']),
                    str(r['value']['y']),
                    str(r['value']['width']),
                    str(r['value']['height'])
                ]),
                r['rectanglelabels'][0]
            )
            for r in moire_results
        ],
        columns=['img_name','bbox','label']
    )
    
    # masken laden
    con = sqlite3.connect( dotenv['DB_PATH'] )
    
    masks = pd.read_sql(
        'SELECT * FROM mask',
        con
    )
    
    masks.loc[
        :,
        'img_name'
    ] = masks.apply(lambda val: f"{ val.job }.{ val.pdf_filename }.{ val.method }.{ val.idx }.4c_600.jpg", axis=1)

    # Frames mergen
    merged = pd.merge(
        masks,
        results_frame,
        how="left",
        on=['img_name','bbox']
    )
    
    merged = merged.loc[
        merged.label == 'checked_moire'
    ]
    
    return merged

In [19]:
0 / 0

ZeroDivisionError: division by zero

In [20]:
df = get_available_moires()
df.loc[
    :,
    'non_moire_path'
] = df.apply(lambda row: (dotenv['DATA_DIR'] / row.job / 'ps2400dpi150lpi' / f"{ row.pdf_filename }.4c.4c_600.jpg"), axis=1)

df.loc[
    :,
    'moire_path'
] = df.apply(lambda row: (dotenv['GENERIC_GENERATED_DATA_DIR'] / row.img_name), axis=1)

df = df.loc[
    df.apply(lambda row: row.moire_path.exists() and row.non_moire_path.exists(), axis=1)
]

In [21]:
df.shape

(304, 17)

In [22]:
TEST_SIZE = .2
VAL_SIZE = .1

In [23]:
sample = df.sample(frac=1)
test_limiter = round(sample.shape[0] * TEST_SIZE)
val_limiter = test_limiter + round(sample.shape[0] * VAL_SIZE)

test_rows = sample.iloc[:test_limiter]
val_rows = sample.iloc[test_limiter:val_limiter]
train_rows = sample.iloc[val_limiter:]

In [24]:
generate_tiles( train_rows, 'train' )
generate_tiles( test_rows, 'test' )
generate_tiles( val_rows, 'val' )

100%|█████████████████████████████████████████| 213/213 [08:06<00:00,  2.29s/it]
100%|██████████████████████████████████████| 3538/3538 [00:12<00:00, 287.02it/s]
100%|███████████████████████████████████████████| 61/61 [02:27<00:00,  2.41s/it]
100%|██████████████████████████████████████| 1010/1010 [00:03<00:00, 319.92it/s]
100%|███████████████████████████████████████████| 30/30 [01:15<00:00,  2.53s/it]
100%|████████████████████████████████████████| 646/646 [00:01<00:00, 327.05it/s]


# Real Validation schreiben

In [None]:
def get_moire_mask_path( img_name ):
    jobname, filename = re.match(r'(.+?)\..+?\.(.+)\..+?\.jpg', img_name).groups()
    
    return dotenv['DATA_DIR'] / jobname / "halftone600dpi" / f"{ filename }.masks.pkl"

In [None]:
def calc_bbox_iou( bbox_a, bbox_b ):
    intersection_box = [
        max([bbox_a[0], bbox_b[0]]),
        max([bbox_a[1], bbox_b[1]]),
        min([bbox_a[0]+bbox_a[2], bbox_b[0]+bbox_b[2]]),
        min([bbox_a[1]+bbox_a[3], bbox_b[1]+bbox_b[3]])
    ]
    intersection = (intersection_box[0] - intersection_box[2]) * (intersection_box[1] - intersection_box[3])

    union_box = [
        min([bbox_a[0], bbox_b[0]]),
        min([bbox_a[1], bbox_b[1]]),
        max([bbox_a[0]+bbox_a[2], bbox_b[0]+bbox_b[2]]),
        max([bbox_a[1]+bbox_a[3], bbox_b[1]+bbox_b[3]])
    ]
    union = (union_box[0] - union_box[2]) * (union_box[1] - union_box[3])

    if intersection < 0:
        return 0
    
    return intersection / union

In [None]:
MIN_IOU = 0.9

In [None]:
def process_real_moire( moire ):
    mask_path = get_moire_mask_path( moire['img_name'] )
    img_path = dotenv['LABEL_STUDIO_DIR'] / moire['img_name']
    use_mask = False

    if mask_path.exists():
        masks = load_masks(mask_path)
        max_iou = -1
        max_mask = None
        
        for mask in masks:
            iou = calc_bbox_iou(
                [moire['value']['x'],moire['value']['y'],moire['value']['width'],moire['value']['height']],
                [val / 2 for val in mask['bbox']]
            )
            
            if iou > MIN_IOU and iou > max_iou:
                max_iou = iou
                max_mask = mask
                use_mask = True

    img = Image.open(img_path)
    cropped_img = img.crop((
        moire['value']['x'],
        moire['value']['y'],
        moire['value']['x'] + moire['value']['width'],
        moire['value']['y'] + moire['value']['height']
    ))
    
    tiles = cut_img_to_tiles(
        1 - np.array(cropped_img)[:,:,3] / 255,
        mask=max_mask if use_mask else None
    )

    return tiles

In [None]:
tiles = []
for m in tqdm(moires):
    tiles += process_real_moire(m)

In [None]:
len(tiles)

In [None]:
write_tiles( tiles, [1 for t in tiles], 'real_val' )