## TO-DO 
Для начала поработаем с маленьким датасетом. Пусть будет 20 разных фотографий -> 20 * 2 * 15 = 600 фотографий суммарно на трейн.
Каждая фотография предобрабатывается заранее, для увеличения датасета и препятствия к overfit, добавлю random crop.
1. попробую втупую взять первые train_size изображений из raw_images

In [25]:
import cv2
import matplotlib.pyplot as plt
import numpy as np
import os

from colour_demosaicing import (
    EXAMPLES_RESOURCES_DIRECTORY,
    demosaicing_CFA_Bayer_bilinear,
    demosaicing_CFA_Bayer_Malvar2004,
    demosaicing_CFA_Bayer_Menon2007,
    mosaicing_CFA_Bayer,
    masks_CFA_Bayer
)

In [28]:
class DataPreparation:
    def __init__(self, max_sigma, data_size, in_dir='raw_images/', data_dir='data/'):
        
        self.max_sigma = max_sigma
        self.data_size = data_size
        self.in_dir = in_dir
        self.data_dir = data_dir

        self.make_data(in_dir, data_dir, data_size)
        print("Data has been prepared, check ", data_dir)
        #print("Number of data: ", 2 * data_size * max_sigma)
    
    
    def make_data(self, in_dir, out_dir, size):
        sigmas = np.linspace(1, self.max_sigma, self.max_sigma)
        for mode in ['awgn', 'bayer']:
            save_path = f"{out_dir}{mode}/"
            counter = 0
            for image_name in list(os.listdir(in_dir)[:size]):
                if image_name[0] == '.':
                    continue
                    
                for sigma in sigmas:
                    image_path = f'{in_dir}{image_name}'
                    new_image_path = f'{save_path}{sigma}_{image_name}'
                    image = cv2.imread(image_path, 0)
                    if mode == 'awgn':
                        image = cv2.imread(image_path, 0)
                        new_image = self.get_awgn_image(image, sigma)
                    else:
                        image = cv2.imread(image_path)
                        new_image = self.mosaic_awgn_demosaic(image, sigma)
                    self.save_image(new_image_path, new_image)
                    print(f"{mode}: {counter} / {size * len(sigmas)}")
                    counter += 1
            
    def get_rgb_masks(self, shape):
        # GRBG
        g = np.zeros(shape)
        g[::2,::2] = 1
        g[1::2, 1::2] = 1
    
        b = np.zeros(shape)
        b[1::2,::2] = 1
        
        r = np.zeros(shape)
        r[::2,1::2] = 1
    
        return r, g, b
    
    def mosaic(self, image):
        h, w = image.shape[0], image.shape[1]
        r_mask, g_mask, b_mask = get_rgb_masks((h,w))
        # BGR
        blue, green, red = image[:,:,0], image[:,:,1], image[:,:,2]
        # RGB
        #red, green, blue = image[:,:,0], image[:,:,1], image[:,:,2]
        return blue * b_mask + green * g_mask + red * r_mask

    def mosaic_awgn_demosaic(self, image, sigma):
        mosaic_im = self.mosaic(image)
        noisy_mosaic_im = self.get_awgn_image(mosaic_im, sigma)
        demosaic_noisy_im = demosaicing_CFA_Bayer_Menon2007(noisy_mosaic_im, 'GRBG')
        # rgb to bgr
        bgr_im = demosaic_noisy_im[:,:, [2, 1, 0]]
        #rgb_img = demosaic_noisy_im
        # convert to grayscale
        #gray = cv2.cvtColor(bgr_im, cv2.COLOR_BGR2GRAY)
        gray = np.mean(bgr_im, axis=2)
        return gray
    
    def save_image(self, path, image):
        resized_im = cv2.resize(image, (512, 512))
        return cv2.imwrite(path, resized_im)
    
    def get_awgn_image(self, image, scale, loc=0.0):
        noise3d = np.random.normal(loc=loc, scale=scale, size=image.shape)
        noisy_image = np.uint8(np.clip(image + noise3d, 0, 255))
        return noisy_image

In [29]:
%%time
dataprep = DataPreparation(max_sigma=15, data_size=30)

awgn: 0 / 450
awgn: 1 / 450
awgn: 2 / 450
awgn: 3 / 450
awgn: 4 / 450
awgn: 5 / 450
awgn: 6 / 450
awgn: 7 / 450
awgn: 8 / 450
awgn: 9 / 450
awgn: 10 / 450
awgn: 11 / 450
awgn: 12 / 450
awgn: 13 / 450
awgn: 14 / 450
awgn: 15 / 450
awgn: 16 / 450
awgn: 17 / 450
awgn: 18 / 450
awgn: 19 / 450
awgn: 20 / 450
awgn: 21 / 450
awgn: 22 / 450
awgn: 23 / 450
awgn: 24 / 450
awgn: 25 / 450
awgn: 26 / 450
awgn: 27 / 450
awgn: 28 / 450
awgn: 29 / 450
awgn: 30 / 450
awgn: 31 / 450
awgn: 32 / 450
awgn: 33 / 450
awgn: 34 / 450
awgn: 35 / 450
awgn: 36 / 450
awgn: 37 / 450
awgn: 38 / 450
awgn: 39 / 450
awgn: 40 / 450
awgn: 41 / 450
awgn: 42 / 450
awgn: 43 / 450
awgn: 44 / 450
awgn: 45 / 450
awgn: 46 / 450
awgn: 47 / 450
awgn: 48 / 450
awgn: 49 / 450
awgn: 50 / 450
awgn: 51 / 450
awgn: 52 / 450
awgn: 53 / 450
awgn: 54 / 450
awgn: 55 / 450
awgn: 56 / 450
awgn: 57 / 450
awgn: 58 / 450
awgn: 59 / 450
awgn: 60 / 450
awgn: 61 / 450
awgn: 62 / 450
awgn: 63 / 450
awgn: 64 / 450
awgn: 65 / 450
awgn: 66 / 450
awgn:

bayer: 70 / 450
bayer: 71 / 450
bayer: 72 / 450
bayer: 73 / 450
bayer: 74 / 450
bayer: 75 / 450
bayer: 76 / 450
bayer: 77 / 450
bayer: 78 / 450
bayer: 79 / 450
bayer: 80 / 450
bayer: 81 / 450
bayer: 82 / 450
bayer: 83 / 450
bayer: 84 / 450
bayer: 85 / 450
bayer: 86 / 450
bayer: 87 / 450
bayer: 88 / 450
bayer: 89 / 450
bayer: 90 / 450
bayer: 91 / 450
bayer: 92 / 450
bayer: 93 / 450
bayer: 94 / 450
bayer: 95 / 450
bayer: 96 / 450
bayer: 97 / 450
bayer: 98 / 450
bayer: 99 / 450
bayer: 100 / 450
bayer: 101 / 450
bayer: 102 / 450
bayer: 103 / 450
bayer: 104 / 450
bayer: 105 / 450
bayer: 106 / 450
bayer: 107 / 450
bayer: 108 / 450
bayer: 109 / 450
bayer: 110 / 450
bayer: 111 / 450
bayer: 112 / 450
bayer: 113 / 450
bayer: 114 / 450
bayer: 115 / 450
bayer: 116 / 450
bayer: 117 / 450
bayer: 118 / 450
bayer: 119 / 450
bayer: 120 / 450
bayer: 121 / 450
bayer: 122 / 450
bayer: 123 / 450
bayer: 124 / 450
bayer: 125 / 450
bayer: 126 / 450
bayer: 127 / 450
bayer: 128 / 450
bayer: 129 / 450
bayer: 130