### define environment

In [49]:
USING_GOOGLE_COLAB = False

In [50]:
if USING_GOOGLE_COLAB:
    from google.colab import drive
    drive.mount('/content/drive')

In [51]:
if USING_GOOGLE_COLAB:
    !git clone https://github.com/GrzegorzKazana/nearest-neighbours.git
    !ls drive

In [52]:
import os
import sys

proj_base_path = '/content/nearest-neighbours' if USING_GOOGLE_COLAB else './'
data_base_path = '/content/drive/My Drive/' if USING_GOOGLE_COLAB else './data'
models_base_path = '/content/drive/My Drive/' if USING_GOOGLE_COLAB else './models'
sys.path.append(os.path.join(os.getcwd(), proj_base_path))

### code starts here

In [53]:
import numpy as np

images_file_path = 'imgs_task1.npy'
labels_file_path = 'labels_task1.npy'

images = np.load(os.path.join(data_base_path, images_file_path))
labels = np.load(os.path.join(data_base_path, labels_file_path))
labels = np.squeeze(labels)

print(images.shape, labels.shape)
print(f'number of samples {len(images)}')

assert len(images) == len(labels)

n_of_samples = len(images)

(2782, 224, 224, 3) (2782, 48)
number of samples 2782


In [54]:
def data_generator(X, Y, batch_size):
    while True:
        batch_idxs = np.random.randint(0, len(X), size=batch_size)

        x = X[batch_idxs]
        y = Y[batch_idxs]
        
        yield x, y
        
data_gen = data_generator(images, labels, 16)
next(data_gen)

(array([[[[254, 254, 254],
          [254, 254, 254],
          [254, 254, 254],
          ...,
          [152, 156, 172],
          [148, 155, 170],
          [148, 157, 170]],
 
         [[254, 254, 254],
          [254, 254, 254],
          [254, 254, 254],
          ...,
          [151, 155, 169],
          [154, 158, 172],
          [152, 157, 168]],
 
         [[254, 254, 254],
          [254, 254, 254],
          [254, 254, 254],
          ...,
          [154, 157, 170],
          [158, 160, 171],
          [159, 160, 170]],
 
         ...,
 
         [[  0,   9,   4],
          [  5,  10,   8],
          [ 16,  16,  16],
          ...,
          [218, 212, 189],
          [253, 255, 233],
          [253, 253, 239]],
 
         [[  2,  16,  14],
          [ 15,  23,  22],
          [ 16,  22,  21],
          ...,
          [254, 252, 230],
          [253, 252, 232],
          [255, 253, 239]],
 
         [[  2,  19,  20],
          [  7,  19,  19],
          [  9,  20,  18],
   

### model preparation

In [55]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

INPUT_SHAPE = (224, 224, 3)
HIDDEN_SIZE = 1024
OUTPUT_SIZE = labels.shape[-1]

In [65]:
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.layers import Input, Flatten, Dense
from tensorflow.keras.models import Model

input_layer = Input(shape=INPUT_SHAPE)
model_vgg16_conv = VGG16(weights='imagenet', include_top=False)

for l in model_vgg16_conv.layers:
    l.trainable = False

output_vgg16_conv = model_vgg16_conv(input_layer)

x = Flatten()(output_vgg16_conv)
x = Dense(HIDDEN_SIZE, activation='relu')(x)
x = Dense(OUTPUT_SIZE, activation='sigmoid')(x)

model = Model(input_layer, outputs=x)

model.summary()

Model: "model_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_15 (InputLayer)        [(None, 224, 224, 3)]     0         
_________________________________________________________________
vgg16 (Model)                multiple                  14714688  
_________________________________________________________________
flatten_7 (Flatten)          (None, 25088)             0         
_________________________________________________________________
dense_14 (Dense)             (None, 1024)              25691136  
_________________________________________________________________
dense_15 (Dense)             (None, 48)                49200     
Total params: 40,455,024
Trainable params: 25,740,336
Non-trainable params: 14,714,688
_________________________________________________________________


In [66]:
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['binary_accuracy', 'binary_crossentropy', 'categorical_crossentropy']
)

### training

In [67]:
from datetime import datetime

def preprocessed_gen(X, Y, batch_size):
    gen = data_generator(X, Y, batch_size)
    while True:
        x, y = next(gen)
        yield preprocess_input(x), y

BATCH_SIZE = 32

indices = np.random.permutation(n_of_samples)
train_split_idx = int(0.8 * n_of_samples)
training_idx, test_idx = indices[:train_split_idx], indices[train_split_idx:]
training_img, test_img = images[training_idx,:], images[test_idx,:]
training_labels, test_labels = labels[training_idx,:], labels[test_idx,:]

train_data_gen = preprocessed_gen(training_img, training_labels, BATCH_SIZE)
test_data_gen = preprocessed_gen(test_img, test_labels, BATCH_SIZE)

In [None]:
STEPS_PER_EPOCH = 20
VALIDATION_STEPS = 2
EPOCHS = 100

# simple early stopping
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=16)

t = datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%f")
model_path = os.path.join(models_base_path, f'm_{t}.h5')

mc = ModelCheckpoint(model_path, monitor='val_loss', mode='min', verbose=1, save_best_only=True)


history = model.fit_generator(
    train_data_gen,
    steps_per_epoch=STEPS_PER_EPOCH,
    epochs=EPOCHS,
#     callbacks=[es, mc],
    callbacks=[es],
    validation_data=test_data_gen,
    validation_steps=VALIDATION_STEPS
)
# for _ in range(1000):
#     idx = np.random.randint(len(training_img))
#     x, y = training_img[np.newaxis, idx], training_labels[np.newaxis, idx]
#     num_of_ones = len(np.argwhere(y > 0.5))
#     w = {i: 1 for i in range(OUTPUT_SIZE)}
#     w = {
#         **w,
#         **{a[0]: OUTPUT_SIZE / num_of_ones for a in np.argwhere(y > 0.5)}
#     }
    
#     model.fit(
#         x,
#         y,
#         class_weight=w
#     )

Epoch 1/100
Epoch 2/100

In [None]:
from datetime import datetime

t = datetime.timestamp(datetime.now())
models_file_name = f'm_{t}.h5'

model.save(os.path.join(models_base_path, models_file_name))

In [64]:
from tensorflow.keras.models import load_model

load_model(os.path.join(models_base_path, 'm_1572059121.544981.h5'))

<tensorflow.python.keras.engine.training.Model at 0x147b3e490>

In [78]:
x, y = next(test_data_gen)
np.argwhere(y > 0)

array([[ 0, 21],
       [ 0, 29],
       [ 0, 30],
       [ 0, 34],
       [ 0, 43],
       [ 1, 19],
       [ 2,  6],
       [ 2, 13],
       [ 2, 21],
       [ 2, 22],
       [ 2, 34],
       [ 2, 42],
       [ 3,  7],
       [ 3, 21],
       [ 3, 22],
       [ 3, 30],
       [ 3, 31],
       [ 3, 34],
       [ 4,  7],
       [ 4, 22],
       [ 4, 30],
       [ 4, 34],
       [ 5, 16],
       [ 5, 22],
       [ 5, 30],
       [ 5, 31],
       [ 5, 34],
       [ 5, 46],
       [ 6, 30],
       [ 6, 31],
       [ 6, 45],
       [ 7, 29],
       [ 7, 30],
       [ 7, 34],
       [ 7, 42],
       [ 7, 43],
       [ 7, 46],
       [ 8,  3],
       [ 8,  4],
       [ 8, 22],
       [ 8, 30],
       [ 8, 34],
       [ 9, 21],
       [ 9, 29],
       [ 9, 30],
       [ 9, 34],
       [ 9, 42],
       [ 9, 43],
       [10,  3],
       [10,  4],
       [10,  5],
       [10, 21],
       [10, 22],
       [10, 30],
       [10, 34],
       [11,  3],
       [11,  4],
       [11,  5],
       [11, 21

In [79]:
res = model.predict(x)

In [80]:
np.argwhere(res > 0.5)

array([[ 0, 22],
       [ 0, 30],
       [ 0, 34],
       [ 1, 30],
       [ 2, 22],
       [ 2, 30],
       [ 2, 34],
       [ 3, 22],
       [ 3, 30],
       [ 3, 34],
       [ 4, 22],
       [ 4, 30],
       [ 4, 34],
       [ 5, 22],
       [ 5, 30],
       [ 5, 34],
       [ 6, 30],
       [ 7, 22],
       [ 7, 30],
       [ 7, 34],
       [ 8, 22],
       [ 8, 30],
       [ 8, 34],
       [ 9, 22],
       [ 9, 30],
       [ 9, 34],
       [10, 22],
       [10, 30],
       [10, 34],
       [11, 22],
       [11, 30],
       [11, 34],
       [12, 22],
       [12, 30],
       [12, 34],
       [13, 22],
       [13, 30],
       [13, 34],
       [14, 22],
       [14, 30],
       [14, 34],
       [15, 22],
       [15, 30],
       [15, 34],
       [16, 22],
       [16, 30],
       [16, 34],
       [17, 22],
       [17, 30],
       [17, 34],
       [18, 22],
       [18, 30],
       [18, 34],
       [19, 22],
       [19, 30],
       [20, 30],
       [21, 22],
       [21, 30],
       [21, 34