In [1]:
import numpy as np

In [2]:
np.random.seed(21)

In [3]:
import scipy.stats as st
from keras.models import Model
from keras.layers import Input, concatenate, Conv2D, MaxPooling2D, UpSampling2D, AveragePooling2D, Flatten, Dense, Lambda
from keras.optimizers import Adam, SGD
from keras.callbacks import ModelCheckpoint
from keras import backend as K
from keras.losses import mean_squared_error, mean_absolute_error, categorical_crossentropy, binary_crossentropy
from keras.preprocessing.image import Iterator
import matplotlib.pyplot as plt
import cv2
import os
import json
import pickle
%matplotlib inline

from keras.utils.np_utils import to_categorical

from keras.layers import Reshape, BatchNormalization

import tensorflow as tf
sess = tf.Session()
K.set_session(sess)

Using TensorFlow backend.


In [4]:
from keras.applications.resnet50 import ResNet50, preprocess_input

In [5]:
base_model = ResNet50(include_top=False, weights='imagenet', input_shape=(None, None, 3))

In [6]:
224 + 2 * 3 # zero padding (3, 3)

230

In [7]:
(230 - 7)/2 + 1  #Conv1 (7, 7), (2, 2)

112.5

In [8]:
(112 - 3)/2 + 1  #Max pooling (3, 3), (2, 2)

55.5

In [9]:
(55 - 1)/2 + 1 # Conv block 3a

28.0

In [10]:
(28 - 1)/2 + 1 # Conv block 4a

14.5

In [11]:
(14 - 1)/2 + 1 # Conv block 5a

7.5

In [1]:
from math import floor

In [2]:
def get_output_size(x = 48):
    x_pad = x + 2 * 3
    print(x_pad)
    x_pad = floor(x_pad)
    
    x_conv1 = (x_pad - 7)/2 + 1
    print(x_conv1)
    x_conv1 = floor(x_conv1)
    
    x_max = (x_conv1 - 3)/2 + 1
    print(x_max)
    x_max = floor(x_max)
    
    x_3a = (x_max - 1)/2 + 1
    print(x_3a)
    x_3a = floor(x_3a)
    
    x_4a = (x_3a - 1)/2 + 1
    print(x_4a)
    x_4a = floor(x_4a)
    
    x_5a = (x_4a - 1)/2 + 1
    print(x_5a)
    x_5a = floor(x_5a)
    
    return x_5a

1541 (49) 965(31), 837 (27), 261(9), 201(7) 101 69 37 gives correct shape at all layers

In [5]:
69 * 2

138

In [44]:
get_output_size(965)

971
483.0
241.0
121.0
61.0
31.0


31

In [340]:
h = base_model.layers[-2]
pups_prediction = Conv2D(1, (2, 2), activation="sigmoid")(h.output)

In [341]:
pups_net = Model(inputs=[base_model.input], outputs=[pups_prediction])

In [342]:
pups_net.predict(np.ones((1, 48, 48, 3))).shape

(1, 1, 1, 1)

In [343]:
pups_net.predict(np.ones((1, 224, 224, 3))).shape

(1, 6, 6, 1)

In [344]:
pups_net.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_12 (InputLayer)            (None, None, None, 3) 0                                            
____________________________________________________________________________________________________
zero_padding2d_6 (ZeroPadding2D) (None, None, None, 3) 0                                            
____________________________________________________________________________________________________
conv1 (Conv2D)                   (None, None, None, 64 9472                                         
____________________________________________________________________________________________________
bn_conv1 (BatchNormalization)    (None, None, None, 64 256                                          
___________________________________________________________________________________________

In [345]:
class NonValidPatch(Exception):
    pass

In [346]:
def get_block_loc(shape, x, y, target_size=(224, 224), n_blocks=(4,4), overlap=(448,448)):
    h, w = shape
    w_block = (w + (n_blocks[1] - 1) * overlap[1]) // n_blocks[1]
    h_block = (h + (n_blocks[0] - 1) * overlap[0]) // n_blocks[0]
    for by in range(n_blocks[0]):
        y_start = by * (h_block - overlap[0])
        y_end = y_start + h_block + 1
        for bx in range(n_blocks[1]):
            x_start = bx * (w_block - overlap[1])
            x_end = x_start + w_block + 1
            
            if x_start <= x < x_end and y_start <= y < y_end and\
            x_start <= x + target_size[1] - 1 < x_end and y_start <= y + target_size[0] - 1 < y_end:
                return bx + by * n_blocks[0], x - x_start, y - y_start
    raise NonValidPatch("Can't find block...??")

In [349]:
class PupsIterator(Iterator):
    """Iterator yielding training samples of pups
    :param root_dir: Directory containing training images, and dots.
    :param image_ids: Set of image ids to use to sample patches.
    :param class_weights: Weights for each class.
    :param n_samples_per_image: Number of patches to sample on each image.
    :param target_size: Size of the patches sampled.
    :param batch_size: Number of patches sampled per batch
    :param shuffle: Boolean, whether to shuffle the data between epochs.
    :param seed: Random seed for data shuffling.
    :return batch_x, batch_x. 
        batch_x is a (batch_size, target_size[0], target_size[1], 3) array
        batch_x is a (batch_size, target_size[0], target_size[1], 1) array if output_counts is False
        otherwise, it is a (batch_size, 5) array.
    """

    def __init__(self, root_dir, image_ids,
                 class_weights = None,
                 n_samples_per_image=160,
                 target_size=(48, 48),
                 batch_size=32, shuffle=True, seed=42, debug_dir=None):
        
        self.n_sealion_types = 5
        self.image_ids = image_ids
        self.root_dir = root_dir
        self.debug_dir = debug_dir
        # Normalize to use class_weights as a probability distribution.
        if class_weights:
            self.class_weights = np.asarray(class_weights)/np.sum(class_weights)
        else:
            self.class_weights = np.ones((self.n_sealion_types+1))/(self.n_sealion_types + 1)
            
        self.n_samples_per_image = n_samples_per_image
        self.target_size = target_size
        self.n_indices = len(self.image_ids) * self.n_samples_per_image
                 
        super(PupsIterator, self).__init__(self.n_indices, batch_size, shuffle, seed)
    
    def normalize_input(self, x_bgr):
        x_bgr[..., 0] -= 103.939
        x_bgr[..., 1] -= 116.779
        x_bgr[..., 2] -= 123.68
        return x_bgr
    
    def denormalize_input(self, x_normed):
        x[..., 0] += 103.939
        x[..., 1] += 116.779
        x[..., 2] += 123.68

    def random_transform(self, im):
        flip_hor = np.random.randint(0, 2)
        flip_ver = np.random.randint(0, 2)
        if flip_hor == 1:
            im = cv2.flip(im, 0)
        if flip_ver == 1:
            im = cv2.flip(im, 1)
        return im
    
    def sample(self, shape, dots, image_id):
        # if more than 30% of the patch is masked, reject it
        threshold_masked = 0.3 
        
        # Set probability to 0 if some sealion type is not in the block
        current_weigths = self.class_weights.copy()
        for i in range(self.n_sealion_types):
            if not dots[i]:
                current_weigths[i] = 0
        current_weigths /= np.sum(current_weigths)

        while 1:
            # Choose an output class randomly
            output_class = np.random.choice(self.n_sealion_types + 1, size=(1, ), p=current_weigths)[0]

            try:
                # Sample a location, either for background or for a sealion.
                if output_class == self.n_sealion_types:
                    # avoid bg with pups in it
                    x, y = self.sample_bg(shape, dots[4], image_id)
                else:
                    x, y = self.sample_dot(shape, dots[output_class], image_id)
            
                # Get the corresponding image block, and (x, y) in this block
                bid, x, y = get_block_loc(shape, x, y)
            except NonValidPatch:
                continue
            
            uid = "{iid}_{bid}".format(iid=image_id, bid=bid)
            img = cv2.imread(os.path.join(self.root_dir, "TrainBlock", uid + ".jpg"))
            if img is None:
                continue
            img_patch = img[y:y+self.target_size[0], x:x+self.target_size[1],:]
            masked_pixels = np.count_nonzero(img_patch == 0)
            total_pixels = img_patch.shape[0] * img_patch.shape[1]
            if img_patch.shape[0] != self.target_size[0] or img_patch.shape[1] != self.target_size[1]:
                continue
            if masked_pixels/total_pixels < threshold_masked:
                return self.random_transform(img_patch), 1 if output_class==4 else 0
        
    def contains_dots(self, xstart, ystart, dots, margin):
        x1 = xstart - margin
        y1 = ystart - margin
        x2 = xstart + self.target_size[1] + margin
        y2 = ystart + self.target_size[0] + margin
        for (x, y) in dots:
            if x1 <= x < x2 and y1 <= y < y2:
                return True
        return False
    
    def sample_bg(self, shape, dots, image_id):
        margin = 16
        max_iterations = 10
        current_iteration = 0
        while current_iteration < max_iterations:
            x = np.random.randint(0, shape[1] - self.target_size[1], size=(1,))[0]
            y = np.random.randint(0, shape[0] - self.target_size[0], size=(1,))[0]
            if not self.contains_dots(x, y, dots, margin):
                return x, y
            current_iteration += 1
        raise NonValidPatch("Cant' find background")
    
    def sample_dot(self, shape, dots, image_id):
        margin = self.target_size[0]//8
        
        rand_index = np.random.choice(len(dots), size=(1,))[0]
        rand_dot = dots[rand_index]
        
        min_x = max(0, rand_dot[0] - self.target_size[1]//2 - margin)
        max_x = min(shape[1] - self.target_size[1], rand_dot[0] - self.target_size[1]//2 + margin)
        
        min_y = max(0, rand_dot[1] - self.target_size[0]//2 - margin)
        max_y = min(shape[0] - self.target_size[0], rand_dot[1] - self.target_size[0]//2 + margin)
        
        if min_x > max_x:
            max_x, min_x = min_x, max_x
        if min_y > max_y:
            max_y, min_y = min_y, max_y 
            
        if min_x == max_x or min_y == max_y:
            raise NonValidPatch()
           
        x = np.random.randint(min_x, max_x, size=(1,))[0]
        y = np.random.randint(min_y, max_y, size=(1,))[0]
        
        return x, y
        
    def next(self):
        """For python 2.x.
        # Returns
            The next batch.
        """
        # Keeps under lock only the mechanism which advances
        # the indexing of each batch.
        with self.lock:
            index_array, current_index, current_batch_size = next(self.index_generator)
                 
        batch_x = np.zeros((current_batch_size, self.target_size[0], self.target_size[1], 3), dtype=K.floatx())
        batch_y = np.zeros((current_batch_size), dtype=np.int32)
        
        # For each index, we load the data and sample randomly n_successive_samples patches
        for i, j in enumerate(index_array):
            index = j // self.n_samples_per_image
            image_id = self.image_ids[index]
            with open(os.path.join(self.root_dir, "TrainDots", str(image_id) + ".pkl"), "rb") as pfile:
                dots = pickle.load(pfile)
            with open(os.path.join(self.root_dir, "TrainShape", str(image_id) + ".pkl"), "rb") as pfile:
                shape = pickle.load(pfile)
                
            x, y = self.sample(shape, dots, image_id)
            batch_x[i, ...] = x
            batch_y[i] = y 

        if self.debug_dir:
            for i in range(current_batch_size):
                cv2.imwrite(os.path.join(self.debug_dir, "patch_{}.jpg".format(i)), batch_x[i])
                
        return self.normalize_input(batch_x), batch_y.reshape((-1, 1, 1, 1))

In [350]:
with open("../data/sealion/train.json", "r") as jfile:
    train_ids = json.load(jfile)
train_ids = [int(iid) for iid in train_ids]

with open("../data/sealion/val.json", "r") as jfile:
    val_ids = json.load(jfile)
val_ids = [int(iid) for iid in val_ids]

In [351]:
class_weights = [0.25,  0.25, 0.25, 0.25, 2.0, 0.1]

In [352]:
trainPupsGenerator = PupsIterator("/home/lowik/sealion/data/sealion/", train_ids, class_weights=class_weights)

valPupsGenerator = PupsIterator("/home/lowik/sealion/data/sealion/", val_ids, class_weights=class_weights)

In [354]:
for batch_x, batch_y in trainPupsGenerator:
    break

In [355]:
batch_y.shape

(32, 1, 1, 1)

In [356]:
batch_x.shape

(32, 48, 48, 3)

In [358]:
batch_y.ravel()

array([0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 1], dtype=int32)

In [359]:
pups_net.predict(batch_x).shape

(32, 1, 1, 1)

In [360]:
for layer in base_model.layers:
    layer.trainable = False

In [361]:
from keras.losses import binary_crossentropy

In [362]:
sgd = SGD(lr=0.01, momentum=0.9, decay=0.0005, nesterov=True)
pups_net.compile(optimizer=sgd, loss=binary_crossentropy, metrics=['accuracy'])

In [363]:
h = pups_net.fit_generator(trainPupsGenerator, 5, epochs=3, verbose=1, callbacks=None, validation_data=valPupsGenerator, validation_steps=5, class_weight=None, max_q_size=10, workers=1, pickle_safe=False, initial_epoch=0)

Epoch 1/3
Epoch 2/3
Epoch 3/3
