In [27]:
# %load GoodsClassifier.py

import os

from keras.applications.resnet50 import ResNet50
from keras.callbacks import ModelCheckpoint
from keras.layers import Dense, Flatten
from keras.models import Model
from keras.optimizers import SGD
from keras.preprocessing import image
from keras.applications.vgg19 import preprocess_input
from keras.preprocessing.image import ImageDataGenerator
import numpy as np
from annoy import AnnoyIndex

In [28]:
features_path = "data/goods_features/"
index_path = features_path + "resnet_256.idx"
pretrained_features_path = features_path + "finetuned_weights.h5"

img_height = 224
img_width = 224
batch_size = 8

# кількість класів, підставте ваше значення
nb_classes = 844
epochs = 42
# розділити датасет на тренувальний та тестовий
# у пропорції 90/10
train_dir = '../datasets/classifieds/train'
validation_dir = '../datasets/classifieds/validation'

In [29]:
# модель = ResNet50 без голови з одним dense шаром для класифікації об'єктів на nb_classes
def get_model(nb_classes=100, fine_tune=False):
    feature_extractor = ResNet50(include_top=False, weights='imagenet', input_shape=(224, 224, 3))
    flat = Flatten()(feature_extractor.output)  
    # можна додати кілька dense шарів:
    d = Dense(nb_classes*2, activation='relu')(flat)
    d = Dense(nb_classes, activation='softmax')(d)
    d = Dense(nb_classes, activation='softmax')(d)
    model = Model(inputs=feature_extractor.input, outputs=d)
    
    if fine_tune:
        model.load_weights(pretrained_features_path)
        
    # "заморозимо" всі шари ResNet50, крім кількох останніх
    # базові ознаки згорткових шарів перших рівнів досить універсальні, тому ми не будемо міняти їх ваги
    # кількість шарів, які ми "заморожуємо" - це гіперпараметр    
        for layer in m.layers[:-12]:
            layer.trainable = False            
    # для finetuning ми використаємо звичайний SGD з малою швидкістю навчання та моментом
        m.compile(
            optimizer=SGD(lr=1e-4, momentum=0.9),
            loss='categorical_crossentropy',
            metrics=['accuracy'])
    else:
        for layer in m.layers[:-3]:
            layer.trainable = False
        model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
        model.summary()
    return model

In [30]:
def train_head_model(nb_classes):
    feature_extractor = ResNet50(include_top=False, weights='imagenet', input_shape=(224, 224, 3))
    
    datagen = ImageDataGenerator(rescale=1./255)
    train_generator = datagen.flow_from_directory(
        train_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode=None,
        shuffle=False)      
    
    validation_generator = datagen.flow_from_directory(
        validation_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode=None,
        shuffle=False)
    
    features_train = feature_extractor.predict_generator(
        train_generator, nb_train_samples // batch_size)
    
    features_validation = feature_extractor.predict_generator(
        validation_generator, nb_validation_samples // batch_size)
      
    model = get_model(nb_classes)
    model.fit(features_train, train_generator.classes,
          epochs=epochs,
          batch_size=batch_size,
          validation_data=(features_validation, validation_generator.classes))
    
    model.save_weights(pretrained_features_path)
    print("Pretrained features have been saved successfully.")

In [31]:
def start_finetuning(nb_classes):
        
    model = get_model(nb_classes, fine_tune=True)

    train_gen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)
    
    validation_gen = ImageDataGenerator(rescale=1. / 255)

    train_generator = train_gen.flow_from_directory(
        train_dir,
        target_size=(img_height, img_width),
        batch_size=batch_size,
        class_mode='categorical')

    validation_generator = validation_gen.flow_from_directory(
        validation_dir,
        target_size=(img_height, img_width),
        batch_size=batch_size,
        class_mode='categorical')
    
    model.fit_generator(
        generator=train_generator,
        validation_data=validation_generator,
        validation_steps=nb_validation_samples // batch_size,
        steps_per_epoch=nb_train_samples // batch_size,
        nb_epoch=epochs,
        callbacks=[ModelCheckpoint(pretrained_features_path, save_best_only=True, monitor='val_loss')])

    model.save_weights(pretrained_features_path)
    
    return model

In [32]:
def startTraining(nb_classes):
    if not os.path.exists(pretrained_features_path):
        train_head_model(nb_classes)
    model = start_finetuning(nb_classes)
    return model    

In [33]:
vector_size = 2048
n_trees = 256

In [None]:
def predict_n_neighbours(targetImagePath, topn=5):
    annoy = AnnoyIndex(vector_size, metric='angular')
    annoy.load(index_path)
    