### define environment

In [2]:
USING_GOOGLE_COLAB = False

In [3]:
if USING_GOOGLE_COLAB:
    from google.colab import drive
    drive.mount('/content/drive')

In [4]:
if USING_GOOGLE_COLAB:
    !git clone https://github.com/GrzegorzKazana/nearest-neighbours.git
    !ls drive

In [5]:
import os
import sys

proj_base_path = '/content/nearest-neighbours' if USING_GOOGLE_COLAB else './'
data_base_path = '/content/drive/My Drive/' if USING_GOOGLE_COLAB else './data'
models_base_path = '/content/drive/My Drive/' if USING_GOOGLE_COLAB else './models'
sys.path.append(os.path.join(os.getcwd(), proj_base_path))

### code starts here

In [22]:
import numpy as np

images_file_path = 'imgs_task1_smol.npy'
labels_file_path = 'labels_task1_smol.npy'

images = np.load(os.path.join(data_base_path, images_file_path))
labels = np.load(os.path.join(data_base_path, labels_file_path))
labels = np.squeeze(labels)

print(images.shape, labels.shape)
print(f'number of samples {len(images)}')

assert len(images) == len(labels)

n_of_samples = len(images)

(2782, 128, 128, 3) (2782, 48)
number of samples 2782


In [23]:
def data_generator(X, Y, batch_size):
    while True:
        batch_idxs = np.random.randint(0, len(X), size=batch_size)

        x = X[batch_idxs]
        y = Y[batch_idxs]
        
        yield x, y
        
data_gen = data_generator(images, labels, 16)
next(data_gen)

(array([[[[ 22,  30,  41],
          [ 50,  60,  66],
          [ 59,  67,  70],
          ...,
          [ 56,  68,  70],
          [ 46,  58,  51],
          [ 45,  57,  61]],
 
         [[ 54,  64,  60],
          [ 57,  68,  82],
          [ 55,  64,  73],
          ...,
          [ 51,  62,  69],
          [ 55,  68,  75],
          [ 51,  59,  61]],
 
         [[ 53,  62,  76],
          [ 56,  66,  73],
          [ 63,  71,  85],
          ...,
          [ 61,  72,  77],
          [ 58,  67,  71],
          [ 42,  51,  50]],
 
         ...,
 
         [[ 33,  52,  68],
          [ 40,  53,  71],
          [ 55,  66,  71],
          ...,
          [  2,  23,  54],
          [  4,  17,  45],
          [  3,  19,  41]],
 
         [[ 24,  40,  56],
          [ 34,  45,  54],
          [ 23,  44,  57],
          ...,
          [  3,  23,  48],
          [  1,  18,  39],
          [  7,  19,  45]],
 
         [[ 27,  44,  54],
          [ 24,  41,  54],
          [ 40,  54,  57],
   

### model preparation

In [10]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

INPUT_SHAPE = (128, 128, 3)
HIDDEN_SIZE = 256
OUTPUT_SIZE = labels.shape[-1]

In [24]:
from tensorflow.keras.applications.densenet import DenseNet121
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.densenet import preprocess_input
from tensorflow.keras.layers import Input, Flatten, Dense, Dropout
from tensorflow.keras.models import Model

input_layer = Input(shape=INPUT_SHAPE)
model_vgg16_conv = DenseNet121(weights='imagenet', include_top=False)

for l in model_vgg16_conv.layers:
    l.trainable = False

output_vgg16_conv = model_vgg16_conv(input_layer)

x = Flatten()(output_vgg16_conv)
x = Dense(HIDDEN_SIZE, activation='relu')(x)
x = Dropout(0.25)(x)
x = Dense(OUTPUT_SIZE, activation='sigmoid')(x)

model = Model(input_layer, outputs=x)

model.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         [(None, 128, 128, 3)]     0         
_________________________________________________________________
densenet121 (Model)          multiple                  7037504   
_________________________________________________________________
flatten_1 (Flatten)          (None, 16384)             0         
_________________________________________________________________
dense_2 (Dense)              (None, 256)               4194560   
_________________________________________________________________
dropout_1 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 48)                12336     
Total params: 11,244,400
Trainable params: 4,206,896
Non-trainable params: 7,037,504
________________________________________

In [25]:
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['binary_accuracy', 'binary_crossentropy', 'categorical_crossentropy']
)

### training

In [26]:
from datetime import datetime

def preprocessed_gen(X, Y, batch_size):
    gen = data_generator(X, Y, batch_size)
    while True:
        x, y = next(gen)
        yield preprocess_input(x), y

BATCH_SIZE = 32

indices = np.random.permutation(n_of_samples)
train_split_idx = int(0.8 * n_of_samples)
training_idx, test_idx = indices[:train_split_idx], indices[train_split_idx:]
training_img, test_img = images[training_idx,:], images[test_idx,:]
training_labels, test_labels = labels[training_idx,:], labels[test_idx,:]
# training_img, test_img = images, images
# training_labels, test_labels = labels, labels

train_data_gen = preprocessed_gen(training_img, training_labels, BATCH_SIZE)
test_data_gen = preprocessed_gen(test_img, test_labels, BATCH_SIZE)

In [None]:
STEPS_PER_EPOCH = 20
VALIDATION_STEPS = 2
EPOCHS = 10

# simple early stopping
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=16)

t = datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%f")
model_path = os.path.join(models_base_path, f'm_{t}.h5')

mc = ModelCheckpoint(model_path, monitor='val_loss', mode='min', verbose=1, save_best_only=True)


history = model.fit_generator(
    train_data_gen,
    steps_per_epoch=STEPS_PER_EPOCH,
    epochs=EPOCHS,
#     callbacks=[es, mc],
#     callbacks=[es],
    validation_data=test_data_gen,
    validation_steps=VALIDATION_STEPS
)
# for _ in range(1000):
#     idx = np.random.randint(len(training_img))
#     x, y = training_img[np.newaxis, idx], training_labels[np.newaxis, idx]
#     num_of_ones = len(np.argwhere(y > 0.5))
#     w = {i: 1 for i in range(OUTPUT_SIZE)}
#     w = {
#         **w,
#         **{a[0]: OUTPUT_SIZE / num_of_ones for a in np.argwhere(y > 0.5)}
#     }
    
#     model.fit(
#         x,
#         y,
#         class_weight=w
#     )

Epoch 1/10
 4/20 [=====>........................] - ETA: 1:09 - loss: 0.1495 - binary_accuracy: 0.9468 - binary_crossentropy: 0.1495 - categorical_crossentropy: 13.3776

In [28]:
from datetime import datetime

t = datetime.timestamp(datetime.now())
models_file_name = f'm_{t}.h5'

model.save(os.path.join(models_base_path, models_file_name))

In [None]:
from tensorflow.keras.models import load_model

load_model(os.path.join(models_base_path, 'm_1572059121.544981.h5'))

In [112]:
x, y = next(test_data_gen)
np.argwhere(y > 0)

array([[ 0,  6],
       [ 0, 13],
       [ 0, 21],
       [ 0, 22],
       [ 0, 24],
       [ 0, 30],
       [ 0, 34],
       [ 1,  6],
       [ 1, 16],
       [ 1, 22],
       [ 1, 34],
       [ 1, 39],
       [ 2,  0],
       [ 2, 21],
       [ 2, 22],
       [ 2, 30],
       [ 2, 34],
       [ 2, 37],
       [ 2, 42],
       [ 3, 21],
       [ 3, 22],
       [ 3, 30],
       [ 3, 34],
       [ 3, 46],
       [ 4,  0],
       [ 4, 22],
       [ 4, 30],
       [ 4, 34],
       [ 5, 19],
       [ 5, 30],
       [ 6,  6],
       [ 6, 13],
       [ 6, 22],
       [ 6, 30],
       [ 6, 31],
       [ 6, 34],
       [ 6, 39],
       [ 7,  3],
       [ 7,  4],
       [ 7,  5],
       [ 7, 22],
       [ 7, 27],
       [ 7, 30],
       [ 7, 34],
       [ 7, 46],
       [ 8, 10],
       [ 8, 22],
       [ 8, 30],
       [ 8, 34],
       [ 8, 39],
       [ 9, 12],
       [ 9, 21],
       [ 9, 22],
       [ 9, 30],
       [ 9, 34],
       [ 9, 46],
       [10,  3],
       [10,  4],
       [10, 22

In [113]:
res = model.predict(x)

In [115]:
np.argwhere(res > 0.5)

array([], shape=(0, 2), dtype=int64)