## TO-DO 
Для начала поработаем с маленьким датасетом. Пусть будет 20 разных фотографий -> 20 * 2 * 15 = 600 фотографий суммарно на трейн.
Каждая фотография предобрабатывается заранее, для увеличения датасета и препятствия к overfit, добавлю random crop.
1. попробую втупую взять первые train_size изображений из raw_images

In [1]:
import cv2
import matplotlib.pyplot as plt
import numpy as np
import os

from colour_demosaicing import (
    EXAMPLES_RESOURCES_DIRECTORY,
    demosaicing_CFA_Bayer_bilinear,
    demosaicing_CFA_Bayer_Malvar2004,
    demosaicing_CFA_Bayer_Menon2007,
    mosaicing_CFA_Bayer,
    masks_CFA_Bayer
)

In [13]:
class DataPreparation:
    def __init__(self, max_sigma, train_size, val_size, test_size, in_dir='raw_images/', data_dir='data/'):
        
        self.max_sigma = max_sigma
        self.train_size = train_size
        self.in_dir = in_dir
        self.data_dir = data_dir
        self.val_size = val_size
        self.test_size = test_size
        
        self.make_train(in_dir, "{}train/".format(data_dir), self.train_size)
        self.make_val(in_dir, "{}val/".format(data_dir), self.val_size)
        # self.make_test(in_dir, f"{data_dir}test/")
        print("Data has been prepared, check ", data_dir)
        #print("Number of data: ", 2 * data_size * max_sigma)
    
    
    def make_train(self, in_dir, out_dir, size):
        print("============================ TRAIN ==========================")
        sigmas = np.linspace(1, self.max_sigma, self.max_sigma)
        for mode in ['awgn', 'bayer']:
            save_path = f"{out_dir}{mode}/"
            counter = 0
            for image_name in list(os.listdir(in_dir)[:size]):
                if image_name[0] == '.':
                    continue
                    
                for sigma in sigmas:
                    image_path = f'{in_dir}{image_name}'
                    new_image_path = f'{save_path}{sigma}_{image_name}'
                    
                    if mode == 'awgn':
                        image = cv2.imread(image_path, 0)
                        image = cv2.resize(image, (64, 64))
                        
                        new_image = self.get_awgn_image(image, sigma)
                    else:
                        image = cv2.imread(image_path)
                        image = cv2.resize(image, (64, 64))
                        
                        new_image = self.mosaic_awgn_demosaic(image, sigma)
                    self.save_image(new_image_path, new_image)
                    print(f"{mode}: {counter} / {size * len(sigmas)}")
                    counter += 1
    
    
    def make_val(self, in_dir, out_dir, size):
        print("============================ VALIDATION ==========================")
        sigmas = np.linspace(1, self.max_sigma, self.max_sigma)
        for mode in ['awgn', 'bayer']:
            save_path = f"{out_dir}{mode}/"
            counter = 0
            val_images = list(os.listdir(in_dir)[self.train_size:self.train_size+self.val_size])
            for image_name in val_images:
                if image_name[0] == '.':
                    continue
                    
                for sigma in sigmas:
                    image_path = f'{in_dir}{image_name}'
                    new_image_path = f'{save_path}{sigma}_{image_name}'
                    
                    if mode == 'awgn':
                        image = cv2.imread(image_path, 0)
                        image = cv2.resize(image, (64, 64))
                        
                        new_image = self.get_awgn_image(image, sigma)
                    else:
                        image = cv2.imread(image_path)
                        image = cv2.resize(image, (64, 64))
                        
                        new_image = self.mosaic_awgn_demosaic(image, sigma)
                    self.save_image(new_image_path, new_image)
                    print(f"{mode}: {counter} / {size * len(sigmas)}")
                    counter += 1
    
    
    def make_test(self, in_dir, out_dir):
        print("============================ TEST ==========================")
        sigmas = np.linspace(1, self.max_sigma, self.max_sigma)
        size = self.test_size
        for mode in ['awgn', 'bayer']:
            save_path = f"{out_dir}{mode}/"
            counter = 0
            test_images = list(os.listdir(in_dir)[self.train_size+self.val_size:self.train_size+self.val_size+size])
            for image_name in test_images:
                if image_name[0] == '.':
                    continue
                    
                for sigma in sigmas:
                    image_path = f'{in_dir}{image_name}'
                    new_image_path = f'{save_path}{sigma}_{image_name}'
                    image = cv2.imread(image_path, 0)
                    if mode == 'awgn':
                        image = cv2.imread(image_path, 0)
                        new_image = self.get_awgn_image(image, sigma)
                    else:
                        image = cv2.imread(image_path)
                        new_image = self.mosaic_awgn_demosaic(image, sigma)
                    self.save_image(new_image_path, new_image)
                    print(f"{mode}: {counter} / {size * len(sigmas)}")
                    counter += 1
            
    def get_rgb_masks(self, shape):
        # GRBG
        g = np.zeros(shape)
        g[::2,::2] = 1
        g[1::2, 1::2] = 1
    
        b = np.zeros(shape)
        b[1::2,::2] = 1
        
        r = np.zeros(shape)
        r[::2,1::2] = 1
    
        return r, g, b
    
    def mosaic(self, image):
        h, w = image.shape[0], image.shape[1]
        r_mask, g_mask, b_mask = self.get_rgb_masks((h,w))
        # BGR
        blue, green, red = image[:,:,0], image[:,:,1], image[:,:,2]
        # RGB
        #red, green, blue = image[:,:,0], image[:,:,1], image[:,:,2]
        return blue * b_mask + green * g_mask + red * r_mask

    def mosaic_awgn_demosaic(self, image, sigma):
        mosaic_im = self.mosaic(image)
        noisy_mosaic_im = self.get_awgn_image(mosaic_im, sigma)
        demosaic_noisy_im = demosaicing_CFA_Bayer_Menon2007(noisy_mosaic_im, 'GRBG')
        # rgb to bgr
        bgr_im = demosaic_noisy_im[:,:, [2, 1, 0]]
        #rgb_img = demosaic_noisy_im
        # convert to grayscale
        #gray = cv2.cvtColor(bgr_im, cv2.COLOR_BGR2GRAY)
        gray = np.mean(bgr_im, axis=2)
        return gray
    
    def save_image(self, path, image):
        #resized_im = cv2.resize(image, (256, 256))
        return cv2.imwrite(path, image)
    
    def get_awgn_image(self, image, scale, loc=0.0):
        noise3d = np.random.normal(loc=loc, scale=scale, size=image.shape)
        noisy_image = np.uint8(np.clip(image + noise3d, 0, 255))
        return noisy_image

In [14]:
%%time
dataprep = DataPreparation(max_sigma=20, train_size=30, val_size=10, test_size=10)

awgn: 0 / 600
awgn: 1 / 600
awgn: 2 / 600
awgn: 3 / 600
awgn: 4 / 600
awgn: 5 / 600
awgn: 6 / 600
awgn: 7 / 600
awgn: 8 / 600
awgn: 9 / 600
awgn: 10 / 600
awgn: 11 / 600
awgn: 12 / 600
awgn: 13 / 600
awgn: 14 / 600
awgn: 15 / 600
awgn: 16 / 600
awgn: 17 / 600
awgn: 18 / 600
awgn: 19 / 600
awgn: 20 / 600
awgn: 21 / 600
awgn: 22 / 600
awgn: 23 / 600
awgn: 24 / 600
awgn: 25 / 600
awgn: 26 / 600
awgn: 27 / 600
awgn: 28 / 600
awgn: 29 / 600
awgn: 30 / 600
awgn: 31 / 600
awgn: 32 / 600
awgn: 33 / 600
awgn: 34 / 600
awgn: 35 / 600
awgn: 36 / 600
awgn: 37 / 600
awgn: 38 / 600
awgn: 39 / 600
awgn: 40 / 600
awgn: 41 / 600
awgn: 42 / 600
awgn: 43 / 600
awgn: 44 / 600
awgn: 45 / 600
awgn: 46 / 600
awgn: 47 / 600
awgn: 48 / 600
awgn: 49 / 600
awgn: 50 / 600
awgn: 51 / 600
awgn: 52 / 600
awgn: 53 / 600
awgn: 54 / 600
awgn: 55 / 600
awgn: 56 / 600
awgn: 57 / 600
awgn: 58 / 600
awgn: 59 / 600
awgn: 60 / 600
awgn: 61 / 600
awgn: 62 / 600
awgn: 63 / 600
awgn: 64 / 600
awgn: 65 / 600
awgn: 66 / 600
awgn:

awgn: 527 / 600
awgn: 528 / 600
awgn: 529 / 600
awgn: 530 / 600
awgn: 531 / 600
awgn: 532 / 600
awgn: 533 / 600
awgn: 534 / 600
awgn: 535 / 600
awgn: 536 / 600
awgn: 537 / 600
awgn: 538 / 600
awgn: 539 / 600
awgn: 540 / 600
awgn: 541 / 600
awgn: 542 / 600
awgn: 543 / 600
awgn: 544 / 600
awgn: 545 / 600
awgn: 546 / 600
awgn: 547 / 600
awgn: 548 / 600
awgn: 549 / 600
awgn: 550 / 600
awgn: 551 / 600
awgn: 552 / 600
awgn: 553 / 600
awgn: 554 / 600
awgn: 555 / 600
awgn: 556 / 600
awgn: 557 / 600
awgn: 558 / 600
awgn: 559 / 600
awgn: 560 / 600
awgn: 561 / 600
awgn: 562 / 600
awgn: 563 / 600
awgn: 564 / 600
awgn: 565 / 600
awgn: 566 / 600
awgn: 567 / 600
awgn: 568 / 600
awgn: 569 / 600
awgn: 570 / 600
awgn: 571 / 600
awgn: 572 / 600
awgn: 573 / 600
awgn: 574 / 600
awgn: 575 / 600
awgn: 576 / 600
awgn: 577 / 600
awgn: 578 / 600
awgn: 579 / 600
awgn: 580 / 600
awgn: 581 / 600
awgn: 582 / 600
awgn: 583 / 600
awgn: 584 / 600
awgn: 585 / 600
awgn: 586 / 600
awgn: 587 / 600
awgn: 588 / 600
awgn: 58

bayer: 421 / 600
bayer: 422 / 600
bayer: 423 / 600
bayer: 424 / 600
bayer: 425 / 600
bayer: 426 / 600
bayer: 427 / 600
bayer: 428 / 600
bayer: 429 / 600
bayer: 430 / 600
bayer: 431 / 600
bayer: 432 / 600
bayer: 433 / 600
bayer: 434 / 600
bayer: 435 / 600
bayer: 436 / 600
bayer: 437 / 600
bayer: 438 / 600
bayer: 439 / 600
bayer: 440 / 600
bayer: 441 / 600
bayer: 442 / 600
bayer: 443 / 600
bayer: 444 / 600
bayer: 445 / 600
bayer: 446 / 600
bayer: 447 / 600
bayer: 448 / 600
bayer: 449 / 600
bayer: 450 / 600
bayer: 451 / 600
bayer: 452 / 600
bayer: 453 / 600
bayer: 454 / 600
bayer: 455 / 600
bayer: 456 / 600
bayer: 457 / 600
bayer: 458 / 600
bayer: 459 / 600
bayer: 460 / 600
bayer: 461 / 600
bayer: 462 / 600
bayer: 463 / 600
bayer: 464 / 600
bayer: 465 / 600
bayer: 466 / 600
bayer: 467 / 600
bayer: 468 / 600
bayer: 469 / 600
bayer: 470 / 600
bayer: 471 / 600
bayer: 472 / 600
bayer: 473 / 600
bayer: 474 / 600
bayer: 475 / 600
bayer: 476 / 600
bayer: 477 / 600
bayer: 478 / 600
bayer: 479 / 6

bayer: 139 / 200
bayer: 140 / 200
bayer: 141 / 200
bayer: 142 / 200
bayer: 143 / 200
bayer: 144 / 200
bayer: 145 / 200
bayer: 146 / 200
bayer: 147 / 200
bayer: 148 / 200
bayer: 149 / 200
bayer: 150 / 200
bayer: 151 / 200
bayer: 152 / 200
bayer: 153 / 200
bayer: 154 / 200
bayer: 155 / 200
bayer: 156 / 200
bayer: 157 / 200
bayer: 158 / 200
bayer: 159 / 200
bayer: 160 / 200
bayer: 161 / 200
bayer: 162 / 200
bayer: 163 / 200
bayer: 164 / 200
bayer: 165 / 200
bayer: 166 / 200
bayer: 167 / 200
bayer: 168 / 200
bayer: 169 / 200
bayer: 170 / 200
bayer: 171 / 200
bayer: 172 / 200
bayer: 173 / 200
bayer: 174 / 200
bayer: 175 / 200
bayer: 176 / 200
bayer: 177 / 200
bayer: 178 / 200
bayer: 179 / 200
bayer: 180 / 200
bayer: 181 / 200
bayer: 182 / 200
bayer: 183 / 200
bayer: 184 / 200
bayer: 185 / 200
bayer: 186 / 200
bayer: 187 / 200
bayer: 188 / 200
bayer: 189 / 200
bayer: 190 / 200
bayer: 191 / 200
bayer: 192 / 200
bayer: 193 / 200
bayer: 194 / 200
bayer: 195 / 200
bayer: 196 / 200
bayer: 197 / 2