In [1]:
# %load GoodsClassifier.py

import os

from keras.applications.resnet50 import ResNet50
from keras.callbacks import ModelCheckpoint
from keras.layers import Dense, Flatten
from keras.models import Model
from keras.optimizers import SGD
from keras.preprocessing import image
from keras.applications.vgg19 import preprocess_input
from keras.preprocessing.image import ImageDataGenerator
import numpy as np
from annoy import AnnoyIndex

Using TensorFlow backend.


In [2]:
dataset_path = "/mnt/course/classifieds/"
vectors_path = "data/goods_vectors/"
features_dict_path = vectors_path + 'feature_dict.npy'

In [3]:
# модель = ResNet50 без голови з одним dense шаром для класифікації об'єктів на nb_classes
def get_model(nb_classes=100):
    feature_extractor = ResNet50(include_top=False, weights='imagenet', input_shape=(224, 224, 3))
    flat = Flatten()(feature_extractor.output)
    # можна додати кілька dense шарів:
    # d = Dense(nb_classes*2, activation='relu')(flat)
    # d = Dense(nb_classes, activation='softmax')(d)
#     d = Dense(cls, activation='softmax')(flat)
    m = Model(inputs=feature_extractor.input, outputs=flat)

    # "заморозимо" всі шари ResNet50, крім кількох останніх
    # базові ознаки згорткових шарів перших рівнів досить універсальні, тому ми не будемо міняти їх ваги
    # кількість шарів, які ми "заморожуємо" - це гіперпараметр
#     for layer in m.layers[:-12]:
#         layer.trainable = False

    # для finetuning ми використаємо звичайний SGD з малою швидкістю навчання та моментом
    m.compile(
        optimizer=SGD(lr=0.01, momentum=0.9),
        loss='categorical_crossentropy',
        metrics=['accuracy'])
    m.summary()
    return m

In [4]:
# кількість класів, підставте ваше значення
nb_classes = 844
vector_size = 2048
n_trees = 256

model = get_model(nb_classes=nb_classes)
annoy = AnnoyIndex(vector_size, metric='angular')

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_1 (InputLayer)             (None, 224, 224, 3)   0                                            
____________________________________________________________________________________________________
conv1 (Conv2D)                   (None, 112, 112, 64)  9472        input_1[0][0]                    
____________________________________________________________________________________________________
bn_conv1 (BatchNormalization)    (None, 112, 112, 64)  256         conv1[0][0]                      
____________________________________________________________________________________________________
activation_1 (Activation)        (None, 112, 112, 64)  0           bn_conv1[0][0]                   
___________________________________________________________________________________________

In [5]:
annoy_counter = 0
def get_classes_imageVecs_dict(dataset_path, image_batch_size=64):
    if not os.path.isdir(vectors_path):
        os.makedirs(vectors_path)
        labels_features = {}
        dirs = os.listdir(dataset_path)

        for dir in dirs[:-1]:
            class_path = dataset_path + dir + '/'
            files = os.listdir(class_path)
            print('Extracting vecs for class %s:' % dir)
            features = extract_feature_vectors(class_path, files, image_batch_size)
            labels_features[dir] = features

        np.save(features_dict_path, labels_features)
        annoy.build(n_trees)
        annoy.save(vectors_path + 'resnet_%d.idx' % n_trees)
        return labels_features
    return np.load(features_dict_path).item()


def extract_feature_vectors(class_path, imgPaths, batch_size=64):
    global annoy_counter
    resultVectors = list()
    i = 0
    batch = np.zeros((batch_size, 224, 224, 3), dtype=np.float)
    for imgPath in imgPaths:
        img = image.img_to_array(image.load_img(class_path + imgPath, target_size=(224, 224)))
        img = preprocess_input(img)
        batch[i] = img
        i += 1
        if i == batch_size:
            i = 0
            batchVectors = model.predict_on_batch(batch).astype(np.float16)
            for vector in batchVectors:
                annoy.add_item(annoy_counter, vector)
                resultVectors.append(vector)
                annoy_counter += 1
            print('Extracted %d images in total.' % annoy_counter)
    return np.array(resultVectors)

In [8]:
get_classes_imageVecs_dict(dataset_path)

{'488_mebel_Furniture_bedrooms_Vanities_and_Makeup_Tablesd': array([[  3.82324219e-01,   1.18164062e+00,   1.67724609e-01, ...,
           4.18945312e-01,   3.53271484e-01,   5.04394531e-01],
        [  2.27783203e-01,   1.07519531e+00,   0.00000000e+00, ...,
           0.00000000e+00,   0.00000000e+00,   5.18066406e-01],
        [  2.77343750e-01,   7.30133057e-03,   2.90222168e-02, ...,
           2.54687500e+00,   7.44018555e-02,   1.95117188e+00],
        ..., 
        [  2.59765625e-01,   7.72949219e-01,   2.79541016e-02, ...,
           2.21443176e-03,   2.50976562e-01,   1.07116699e-01],
        [  2.26318359e-01,   1.08984375e+00,   3.02001953e-01, ...,
           2.94799805e-02,   4.69970703e-02,   3.54736328e-01],
        [  5.68359375e-01,   1.52709961e-01,   1.33666992e-02, ...,
           9.77050781e-01,   9.92431641e-02,   6.66503906e-01]], dtype=float16),
 '642_spares_Corner_Lights': array([[  5.69335938e-01,   2.10937500e-01,   0.00000000e+00, ...,
           9.39331055

In [9]:
annoy.load('data/resnet_256.idx')

True

In [None]:
# при необхідності завантажити ваги:
# model.load_weights('weights_finetuned.h5')

img_height = 224
img_width = 224
batch_size = 8

# розділити датасет на тренувальний та тестовий
# у пропорції 90/10
train_dir = ''
test_dir = ''

# зробити генератор за рекомендаціями статті:
# https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html

train_gen = ImageDataGenerator(
    rescale=1. / 255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True)

train_generator = train_gen.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary')

model.fit_generator(
    generator=train_generator,
    # validation_data= напишіть генератор для тестових даних
    steps_per_epoch=42,
    nb_epoch=42,
    callbacks=[ModelCheckpoint('weights_finetuned.h5', save_best_only=True, monitor='val_loss')])

model.save_weights('weights_finetuned.h5')