In [45]:
from tensorflow import keras
from pathlib import Path
from shutil import copy
from imageio import imread
from sklearn.feature_extraction import image
import numpy as np

images_dir = Path('Onera Satellite Change Detection dataset - Images/')
labels_dir = Path('Onera Satellite Change Detection dataset - Train Labels/')

# creating parent folder for new data structure
preprocessed_dir = Path('preprocessed/')
if not preprocessed_dir.exists(): preprocessed_dir.mkdir()

In [29]:
# loading all raw data into new structure
def loading_data(dataset):
    
    pre_images_dir = preprocessed_dir / dataset / 'pre'
    if not pre_images_dir.exists(): pre_images_dir.mkdir(parents=True)
    
    post_images_dir = preprocessed_dir / dataset / 'post'
    if not post_images_dir.exists(): post_images_dir.mkdir(parents=True)
        
    clabels_dir = preprocessed_dir / dataset / 'labels'
    if not clabels_dir.exists(): clabels_dir.mkdir(parents=True)
    
    with open(images_dir / f'{dataset}.txt') as f:
        cities = f.read().strip('\n').split(',')
    print(cities)
    
    def move_png_to_preprocessed(city):
        
        # renaming and moving images and labels
        image_pair_dir = images_dir / city / 'pair'
        
        pre_image = image_pair_dir / 'img1.png'
        copy(str(pre_image), str(pre_images_dir / f'{city}_pre.png'))
        
        post_image = image_pair_dir / 'img2.png'
        copy(str(post_image), str(post_images_dir / f'{city}_post.png'))
        
        label = labels_dir / city / 'cm' / 'cm.png'
        copy(str(label), str(clabels_dir / f'{city}_label.png'))
        
    for city in cities:
        move_png_to_preprocessed(city)
        
    return cities
    
    
cities = loading_data('train')

['aguasclaras', 'bercy', 'bordeaux', 'nantes', 'paris', 'rennes', 'saclay_e', 'abudhabi', 'cupertino', 'pisa', 'beihai', 'hongkong', 'beirut', 'mumbai']


In [54]:
def sample_data(root_dir, cities, fraction=1e-6, patch_size=(15,15)):
    
    
    samples = []
    
    for city in cities:
        
        pre_file = root_dir / 'pre' / f'{city}_pre.png'
        post_file = root_dir / 'post' / f'{city}_post.png'
        label_file = root_dir / 'labels' / f'{city}_label.png'
        
        pre = imread(pre_file)[:,:,:3] / 255
        post = imread(post_file)[:,:,:3] / 255
        label = imread(label_file)[:,:,0] / 255
        
        # print(pre.shape, post.shape, label.shape)
        
        # stacking
        pre1 = pre[:,:,0]
        pre2 = pre[:,:,1]
        pre3 = pre[:,:,2]
        
        post1 = post[:,:,0]
        post2 = post[:,:,1]
        post3 = post[:,:,2]
        
        layers = [pre1, pre2, pre3, post1, post2, post3, label]
        
        stack = np.stack(layers, axis=-1)
        # print(stack.shape)
        
        patches = image.extract_patches_2d(stack, (15, 15), max_patches=0.001)
        
        # patches = extract_patches_2d(layers, patch_size=(15,15), max_patches=fraction) # maybe work with random state
        
        print(patches.shape)
        n_samples = patches.shape[0]
        for i in range(n_samples):
            pre_img = patches[i,:,:,:3]
            post_img = patches[i,:,:,3:6]
            label = patches[i,:,:,6][7,7]
        
            sample = {
                'pre': pre_img,
                'post': post_img,
                'label': label,
                'city': city
            }
        
            samples.append(sample)
        
        
    
    
    
    return samples

root_dir = Path('preprocessed/train/')
samples = sample_data(root_dir, cities, fraction=1e-6, patch_size=(15,15))
print(len(samples))


(233, 15, 15, 7)
(131, 15, 15, 7)
(224, 15, 15, 7)
(288, 15, 15, 7)
(148, 15, 15, 7)
(178, 15, 15, 7)
(421, 15, 15, 7)
(605, 15, 15, 7)
(774, 15, 15, 7)
(536, 15, 15, 7)
(673, 15, 15, 7)
(358, 15, 15, 7)
(1231, 15, 15, 7)
(458, 15, 15, 7)
6258


In [55]:
def get_siamese_model(input_shape):
    """
        Model architecture
    """
    
    # Define the tensors for the two input images
    left_input = Input(input_shape)
    right_input = Input(input_shape)
    
    # Convolutional Neural Network
    model = Sequential()
    model.add(Conv2D(64, (10,10), activation='relu', input_shape=input_shape,
                   kernel_initializer=initialize_weights, kernel_regularizer=l2(2e-4)))
    model.add(MaxPooling2D())
    model.add(Conv2D(128, (7,7), activation='relu',
                     kernel_initializer=initialize_weights,
                     bias_initializer=initialize_bias, kernel_regularizer=l2(2e-4)))
    model.add(MaxPooling2D())
    model.add(Conv2D(128, (4,4), activation='relu', kernel_initializer=initialize_weights,
                     bias_initializer=initialize_bias, kernel_regularizer=l2(2e-4)))
    model.add(MaxPooling2D())
    model.add(Conv2D(256, (4,4), activation='relu', kernel_initializer=initialize_weights,
                     bias_initializer=initialize_bias, kernel_regularizer=l2(2e-4)))
    model.add(Flatten())
    model.add(Dense(4096, activation='sigmoid',
                   kernel_regularizer=l2(1e-3),
                   kernel_initializer=initialize_weights,bias_initializer=initialize_bias))
    
    # Generate the encodings (feature vectors) for the two images
    encoded_l = model(left_input)
    encoded_r = model(right_input)
    
    # Add a customized layer to compute the absolute difference between the encodings
    L1_layer = Lambda(lambda tensors:K.abs(tensors[0] - tensors[1]))
    L1_distance = L1_layer([encoded_l, encoded_r])
    
    # Add a dense layer with a sigmoid unit to generate the similarity score
    prediction = Dense(1,activation='sigmoid',bias_initializer=initialize_bias)(L1_distance)
    
    # Connect the inputs with the outputs
    siamese_net = Model(inputs=[left_input,right_input],outputs=prediction)
    
    # return the model
    return siamese_net

In [24]:
class DataGeneratorSiamese(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, samples, batch_size=32, input_size=(15,15,3), shuffle=True):
        'Initialization'
        self.samples = samples
        self.batch_size = batch_size
        self.input_size = input_size
        self.patch_size = (input_size[0], input_size[1])
        self.shuffle = shuffle
        self.on_epoch_end()
        
        
    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.samples))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.samples) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Generate data
        X, y = self.__data_generation(indexes)

        return X, y


    def __data_generation(self, indexes):
        'Generates data containing batch_size samples' 
        # Initialization
        # X : (n_samples, *input_size)
        X = np.empty((self.batch_size, *self.input_size))
        y = np.empty((self.batch_size), dtype=int)

        
        # Generate data
        for i, index in enumerate(indexes):
            image_patch, label = patch_sample(self.image_files[index], self.label_files[index], self.patch_size)
            # print(image_patch.shape, X.shape)
            X[i,] = image_patch
            y[i] = label

        return X, keras.utils.to_categorical(y, num_classes=self.n_classes)

        

def patch_sample(image_file, label_file, patch_size):
    # print(image_file.name, label_file.name)
    
    patch_nr = 1
    
    img = imageio.imread(image_file)
    img = img[:,:,:3] # removing alpha channel
    img = img / 255 # rescaling to [0, 1]
    
    label = imageio.imread(label_file)
    label = label[:,:,0] # only using first band
    label = label / 255 # rescaling to [0, 1]

    
    # fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(20,10))
    # left, right = axs
    # left.imshow(img)
    # right.imshow(label)
    
    
    labeled_img = np.stack([img[:,:,0],img[:,:,1],img[:,:,2],label], axis=-1)
    labeled_img_patches = extract_patches_2d(labeled_img, patch_size=patch_size, max_patches=patch_nr)
    
    image_patch = labeled_img_patches[0,:,:,:3]
    label_patch = labeled_img_patches[0,:,:,3]
    
    if patch_size[0] % 2 == 0:
        i = patch_size[0] // 2 - 1
        label_subpatches = label_patch[i:i+2, i:i+2]
        # labels = np.zeros((patch_nr,2),dtype=np.int8)

        n_urban = np.sum(label_subpatches[:,:])
        label = 1 if n_urban >=2 else 0
    else:
        i = patch_size // 2
        label = label_patch[i,i]
        
    return image_patch, label


def sophisticated_pick(label_patches):
    
    n_samples, *rest = image_patches
    
    likelihoods = []
    for i in range(n_samples):
        patch = image_patches
        
    
    pass

def augmentation(image_patch):
    pass

In [None]:
# dataloader test

In [None]:
INPUT_SIZE = (16, 16, 3)
N_CLASSES = 2
BATCH_SIZE = 32

dir_images = Path('data/preprocessed/images/')
dir_labels = Path('data/preprocessed/labels/')

image_files = [obj for obj in Path(dir_images).glob('**/*') if obj.is_file()]
label_files = [obj for obj in Path(dir_labels).glob('**/*') if obj.is_file()]

training_generator = DataGenerator(image_files, label_files, batch_size=BATCH_SIZE, input_size=INPUT_SIZE,
                                   n_classes=N_CLASSES, shuffle=True)

model = get_siamese_network(INPUT_SIZE, N_CLASSES)


model.compile(optimizer=tf.optimizers.Adam(lr=0.00006), 
              loss='binary_crossentropy',
              metrics=['accuracy'])


# Train model on dataset
model.fit(x=training_generator, use_multiprocessing=False, workers=1, verbose=1, epochs=5)

In [44]:
from sklearn.datasets import load_sample_image
from sklearn.feature_extraction import image
# Use the array data from the first image in this dataset:
one_image = load_sample_image("china.jpg")
print('Image shape: {}'.format(one_image.shape))
patches = image.extract_patches_2d(one_image, (2, 2))
print('Patches shape: {}'.format(patches.shape))
# Here are just two of these patches:
print(patches[1])

Image shape: (427, 640, 3)
Patches shape: (272214, 2, 2, 3)
[[[174 201 231]
  [174 201 231]]

 [[173 200 230]
  [173 200 230]]]
