In [None]:
import sys
sys.path.append('../../30_data_tools/')
sys.path.append('../process_masks/')

In [None]:
from pathlib import Path
import random

In [3]:
from PIL import Image
import numpy as np
from tqdm import tqdm
from sklearn.model_selection import train_test_split

In [4]:
from load_dataset import get_available_moires, get_train_data, get_moire_path, get_non_moire_path

In [5]:
from helper import load_dotenv
from mask_functions import load_masks, get_whole_mask

In [6]:
dotenv = load_dotenv()

In [7]:
IMG_SIZE = (
    280,
    280
)
MIN_MASK_PARTIAL = 0.5

In [8]:
def get_masks_path( row ):
    return row.img_path.parent / row.img_path.name.replace('.4c.jpg','.masks.pkl')

In [9]:
def get_img_pair( row, dotenv ):

    non_moire_path = get_non_moire_path( row, dotenv )    
    moire_path = get_moire_path( row, dotenv )    
    
    non_moire_img = Image.open( non_moire_path )
    moire_img = Image.open( moire_path )
    rescale_factor = (
        non_moire_img.size[0] / moire_img.size[0],
        non_moire_img.size[1] / moire_img.size[1]
    )
    moire_img = moire_img.resize(non_moire_img.size)

    bbox = [
        int(round(row.bbox[0] * rescale_factor[0])),
        int(round(row.bbox[1] * rescale_factor[1])),
        int(round(row.bbox[2] * rescale_factor[0])),
        int(round(row.bbox[3] * rescale_factor[1]))
    ]
    
    cropped_non_moire_img = non_moire_img.crop((
        bbox[0],
        bbox[1],
        bbox[0] + bbox[2],
        bbox[1] + bbox[3]
    ))
    cropped_moire_img = moire_img.crop((
        bbox[0],
        bbox[1],
        bbox[0] + bbox[2],
        bbox[1] + bbox[3]
    ))

    cropped_non_moire_img = 1 - np.array(cropped_non_moire_img)[:,:,3] / 255
    cropped_moire_img = 1 - np.array(cropped_moire_img)[:,:,3] / 255
    
    return cropped_non_moire_img, cropped_moire_img

In [10]:
def process_sample( row, dotenv ):
    mask = [m for m in load_masks( get_masks_path( row ) ) if m['bbox'] == row.bbox][0]
    moire_path = get_moire_path( row, dotenv )

    non_moire_img, moire_img = get_img_pair( row, dotenv )

    # convolve over image
    out = []
    conv_size = IMG_SIZE
    step_horizontal = int(round(conv_size[0] / 2))
    step_vertical = int(round(conv_size[1] / 2))

    # falls das Bild in mindestens einer Dimension zu klein ist
    if moire_img.shape[1] < conv_size[0]:
        zero_img = np.zeros((
            moire_img.shape[0],
            conv_size[0]
        )).astype('float32')
        
        zero_img[
            :,
            :moire_img.shape[1]
        ] = moire_img
        moire_img = zero_img.copy()
        zero_img[
            :,
            :non_moire_img.shape[1]
        ] = non_moire_img
        non_moire_img = zero_img.copy()

    if moire_img.shape[0] < conv_size[1]:
        zero_img = np.zeros((
            conv_size[1],
            moire_img.shape[1]
        )).astype('float32')

        zero_img[
            :moire_img.shape[0],
            :
        ] = moire_img
        moire_img = zero_img.copy()
        zero_img[
            :non_moire_img.shape[0],
            :
        ] = non_moire_img
        non_moire_img = zero_img.copy()
    
    
    for left in range(0, moire_img.shape[1] - step_horizontal, step_horizontal):
        for top in range(0, moire_img.shape[0] - step_vertical, step_vertical):
            if left + conv_size[0] > moire_img.shape[1]:
                left = moire_img.shape[1] - conv_size[0]

            if top + conv_size[1] > moire_img.shape[0]:
                top = moire_img.shape[0] - conv_size[1]
            
            mask_partial = mask['mask'][
                top:top+conv_size[1],
                left:left+conv_size[0]
            ].mean()

            if mask_partial > MIN_MASK_PARTIAL:
                out.append((
                    1,
                    moire_img[
                        top:top+conv_size[1],
                        left:left+conv_size[0]
                    ]
                ))

                out.append((
                    0,
                    non_moire_img[
                        top:top+conv_size[1],
                        left:left+conv_size[0]
                    ]
                ))

    return out

In [11]:
def test_train_val_split( data, labels, test_size=.2, val_size=.1 ):
    idx_list = random.shuffle([i for i in range(len(data))])

In [12]:
df = get_available_moires()

In [13]:
0 / 0

ZeroDivisionError: division by zero

In [14]:
sample = df.sample(n=50)

In [None]:
combined_data = []

for i in tqdm(range(sample.shape[0])):
    row = sample.iloc[i]

    try:
        combined_data += process_sample( row, dotenv )
    except:
        pass

  4%|██▍                                                         | 2/50 [00:00<00:13,  3.51it/s]

float32


  6%|███▌                                                        | 3/50 [00:01<00:18,  2.48it/s]

float32


  8%|████▊                                                       | 4/50 [00:01<00:24,  1.86it/s]

float32


 12%|███████▏                                                    | 6/50 [00:02<00:19,  2.30it/s]

float32


 28%|████████████████▌                                          | 14/50 [00:09<00:29,  1.23it/s]

float32


 38%|██████████████████████▍                                    | 19/50 [00:11<00:15,  1.96it/s]

float32


 44%|█████████████████████████▉                                 | 22/50 [00:13<00:15,  1.79it/s]

float32


 54%|███████████████████████████████▊                           | 27/50 [00:17<00:13,  1.65it/s]

float32


 58%|██████████████████████████████████▏                        | 29/50 [00:18<00:14,  1.40it/s]

float32


 64%|█████████████████████████████████████▊                     | 32/50 [00:20<00:12,  1.44it/s]

float32


 78%|█████████████████████████████████████████████▏            | 39/50 [10:33<32:34, 177.72s/it]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.20, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.20, random_state=42)

In [None]:
DATASET_DIR = Path('./dataset/')

In [None]:
def write_tiles( data, labels, set_name ):
    set_dir = DATASET_DIR / set_name
    if set_dir.exists() == False:
        set_dir.mkdir()

    for i in tqdm(range(data.shape[0])):
        parent_dir_name = 'no_moire' if labels[i] == 0 else 'moire'
        parent_dir = set_dir / parent_dir_name

        if parent_dir.exists() == False:
            parent_dir.mkdir()
        
        out_path = parent_dir / f"{str(i).zfill(4)}.jpg"
    
        img = Image.fromarray( np.uint8(X_train[i,0,:,:] * 255) ).convert('RGB')
        img.save( out_path, progressive=True )

In [None]:
write_tiles( X_train, y_train, 'train' )
write_tiles( X_test, y_test, 'test' )
write_tiles( X_val, y_val, 'val' )