### define environment

In [96]:
USING_GOOGLE_COLAB = False

In [97]:
if USING_GOOGLE_COLAB:
    from google.colab import drive
    drive.mount('/content/drive')

In [98]:
if USING_GOOGLE_COLAB:
    !git clone https://github.com/GrzegorzKazana/nearest-neighbours.git
    !ls drive

In [99]:
import os
import sys

proj_base_path = '/content/nearest-neighbours' if USING_GOOGLE_COLAB else './'
data_base_path = '/content/drive/My Drive/' if USING_GOOGLE_COLAB else './data'
models_base_path = '/content/drive/My Drive/' if USING_GOOGLE_COLAB else './models'
sys.path.append(os.path.join(os.getcwd(), proj_base_path))

### code starts here

In [124]:
import numpy as np

images_file_path = 'imgs_task1.npy'
labels_file_path = 'labels_task1.npy'

images = np.load(os.path.join(data_base_path, images_file_path))
labels = np.load(os.path.join(data_base_path, labels_file_path))
labels = np.squeeze(labels)

print(images.shape, labels.shape)
print(f'number of samples {len(images)}')

assert len(images) == len(labels)

n_of_samples = len(images)

(2782, 224, 224, 3) (2782, 48)
number of samples 2782


In [125]:
def data_generator(X, Y, batch_size):
    while True:
        batch_idxs = np.random.randint(0, len(X), size=batch_size)

        x = X[batch_idxs]
        y = Y[batch_idxs]
        
        yield x, y
        
data_gen = data_generator(images, labels, 16)
next(data_gen)

(array([[[[163, 176, 182],
          [165, 177, 185],
          [168, 178, 189],
          ...,
          [125, 139, 157],
          [123, 137, 155],
          [122, 136, 154]],
 
         [[168, 180, 184],
          [169, 181, 187],
          [170, 181, 189],
          ...,
          [125, 140, 156],
          [124, 139, 155],
          [123, 138, 154]],
 
         [[152, 168, 177],
          [158, 173, 184],
          [160, 174, 188],
          ...,
          [126, 141, 156],
          [124, 140, 154],
          [123, 138, 153]],
 
         ...,
 
         [[ 89, 115, 132],
          [ 84, 109, 129],
          [ 88, 112, 134],
          ...,
          [ 24,  29,  67],
          [ 19,  31,  64],
          [ 14,  31,  60]],
 
         [[ 94, 119, 137],
          [ 80, 105, 125],
          [ 76,  99, 122],
          ...,
          [ 21,  36,  72],
          [ 21,  33,  66],
          [ 22,  29,  60]],
 
         [[ 31,  52,  93],
          [ 31,  43,  82],
          [ 22,  36,  86],
   

### model preparation

In [126]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

INPUT_SHAPE = (224, 224, 3)
HIDDEN_SIZE = 256
OUTPUT_SIZE = labels.shape[-1]

In [117]:
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.layers import Input, Flatten, Dense, Dropout
from tensorflow.keras.models import Model

input_layer = Input(shape=INPUT_SHAPE)
model_vgg16_conv = VGG16(weights='imagenet', include_top=False)

for l in model_vgg16_conv.layers:
    l.trainable = False

output_vgg16_conv = model_vgg16_conv(input_layer)

x = Flatten()(output_vgg16_conv)
x = Dense(HIDDEN_SIZE, activation='relu')(x)
x = Dropout(0.25)(x)
x = Dense(OUTPUT_SIZE, activation='sigmoid')(x)

model = Model(input_layer, outputs=x)

model.summary()

Model: "model_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_19 (InputLayer)        [(None, 224, 224, 3)]     0         
_________________________________________________________________
vgg16 (Model)                multiple                  14714688  
_________________________________________________________________
flatten_9 (Flatten)          (None, 25088)             0         
_________________________________________________________________
dense_18 (Dense)             (None, 256)               6422784   
_________________________________________________________________
dropout_1 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_19 (Dense)             (None, 48)                12336     
Total params: 21,149,808
Trainable params: 6,435,120
Non-trainable params: 14,714,688
_______________________________________

In [118]:
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['binary_accuracy', 'binary_crossentropy', 'categorical_crossentropy']
)

### training

In [132]:
from datetime import datetime
from tensorflow.keras.preprocessing.image import ImageDataGenerator

def preprocessed_gen(X, Y, batch_size):
    gen = data_generator(X, Y, batch_size)
    while True:
        x, y = next(gen)
        yield preprocess_input(x), y

augmented_data_gen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    channel_shift_range=20,
    horizontal_flip=True,
    fill_mode='nearest'
)
        
BATCH_SIZE = 32

indices = np.random.permutation(n_of_samples)
train_split_idx = int(0.8 * n_of_samples)
training_idx, test_idx = indices[:train_split_idx], indices[train_split_idx:]
training_img, test_img = images[training_idx,:], images[test_idx,:]
training_labels, test_labels = labels[training_idx,:], labels[test_idx,:]
# training_img, test_img = images, images
# training_labels, test_labels = labels, labels

# train_data_gen = preprocessed_gen(training_img, training_labels, BATCH_SIZE)
# test_data_gen = preprocessed_gen(test_img, test_labels, BATCH_SIZE)
train_data_gen = augmented_data_gen.flow(training_img, training_labels, batch_size=BATCH_SIZE)
train_data_gen = augmented_data_gen.flow(test_img, test_labels, batch_size=BATCH_SIZE)

In [None]:
STEPS_PER_EPOCH = 20
VALIDATION_STEPS = 2
EPOCHS = 10

# simple early stopping
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=16)

t = datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%f")
model_path = os.path.join(models_base_path, f'm_{t}.h5')

mc = ModelCheckpoint(model_path, monitor='val_loss', mode='min', verbose=1, save_best_only=True)


history = model.fit_generator(
    train_data_gen,
    steps_per_epoch=STEPS_PER_EPOCH,
    epochs=EPOCHS,
    validation_data=test_data_gen,
    validation_steps=VALIDATION_STEPS
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10

In [129]:
from datetime import datetime

t = datetime.timestamp(datetime.now())
models_file_name = f'm_{t}_old_good_60.h5'

model.save(os.path.join(models_base_path, models_file_name))

In [123]:
from tensorflow.keras.models import load_model

model = load_model(os.path.join(models_base_path, 'm_1572080284.136123_50_ep_0_44.h5'))

In [112]:
x, y = next(test_data_gen)
np.argwhere(y > 0)

array([[ 0,  6],
       [ 0, 13],
       [ 0, 21],
       [ 0, 22],
       [ 0, 24],
       [ 0, 30],
       [ 0, 34],
       [ 1,  6],
       [ 1, 16],
       [ 1, 22],
       [ 1, 34],
       [ 1, 39],
       [ 2,  0],
       [ 2, 21],
       [ 2, 22],
       [ 2, 30],
       [ 2, 34],
       [ 2, 37],
       [ 2, 42],
       [ 3, 21],
       [ 3, 22],
       [ 3, 30],
       [ 3, 34],
       [ 3, 46],
       [ 4,  0],
       [ 4, 22],
       [ 4, 30],
       [ 4, 34],
       [ 5, 19],
       [ 5, 30],
       [ 6,  6],
       [ 6, 13],
       [ 6, 22],
       [ 6, 30],
       [ 6, 31],
       [ 6, 34],
       [ 6, 39],
       [ 7,  3],
       [ 7,  4],
       [ 7,  5],
       [ 7, 22],
       [ 7, 27],
       [ 7, 30],
       [ 7, 34],
       [ 7, 46],
       [ 8, 10],
       [ 8, 22],
       [ 8, 30],
       [ 8, 34],
       [ 8, 39],
       [ 9, 12],
       [ 9, 21],
       [ 9, 22],
       [ 9, 30],
       [ 9, 34],
       [ 9, 46],
       [10,  3],
       [10,  4],
       [10, 22

In [113]:
res = model.predict(x)

In [115]:
np.argwhere(res > 0.5)

array([], shape=(0, 2), dtype=int64)