In [7]:
import dataiku
from dataiku import pandasutils as pdu
import pandas as pd
%config Application.log_level='ERROR'

In [8]:
dataiku.use_plugin_libs('deeplearning-image-v2')

In [9]:
RETRAIN_IMAGES_FOLDER_ID = 'Images_for_retraining'
IMAGES_TO_CLASSIFY_FOLDER_ID = 'images_to_classify'
LABELS_DATASET_ID = 'labels'
RESNET_MODEL_FOLDER_ID = 'resnet_model'
OUTPUT_RETRAINED_MODEL_ID = 'retrained_model'
OUTPUT_SCORED_DATASET_ID = 'classified_images'

# RETRAIN RECIPE

In [10]:
import dku_deeplearning_image.utils as utils
import dku_deeplearning_image.constants as constants

from recipe import RetrainRecipe
from config import RetrainConfig
from utils_objects import DkuModel
from utils_objects import DkuFileManager

RETRAIN_RECIPE_CONFIG = {'random_seed': 1338, 'train_ratio': 0.9, 'gpu_usage': 'all', 'gpu_memory': 'all', 'layer_to_retrain': 'last', 'layer_to_retrain_n': 2, 'model_dropout': 0.5, 'model_reg': None, 'model_optimizer': 'adam', 'model_learning_rate': 0.001, 'batch_size': 10, 'nb_epochs': 5, 'nb_steps_per_epoch': 5, 'nb_validation_steps': 5, 'model_custom_params_opti': [], 'n_augmentation': 2, 'model_custom_params_data_augmentation': [], 'data_augmentation': False, 'tensorboard': True, 'should_use_gpu': False, 'model_pooling': 'avg', 'image_width': 197, 'image_height': 197, 'col_filename': 'path', 'col_label': 'label'}

def get_retrain_recipe_config():
    return RETRAIN_RECIPE_CONFIG


def get_retrain_input_output():
    file_manager = DkuFileManager()
    image_folder = dataiku.Folder(RETRAIN_IMAGES_FOLDER_ID)
    model_folder = dataiku.Folder(RESNET_MODEL_FOLDER_ID)
    label_dataset = dataiku.Dataset(LABELS_DATASET_ID)
    output_folder = dataiku.Folder(OUTPUT_RETRAINED_MODEL_ID)
    return image_folder, label_dataset, model_folder, output_folder


def format_label_df(label_dataset, col_filename, col_label):
    renaming_mapping = {
        col_filename: constants.FILENAME,
        col_label: constants.LABEL
    }
    label_df = label_dataset.get_dataframe().rename(columns=renaming_mapping)[list(renaming_mapping.values())]
    return label_df


def save_output_model(output_folder, model):
    output_model = model.deepcopy(folder=output_folder)
    output_model.save_to_folder()


def get_retrain_recipe():
    recipe_config = get_retrain_recipe_config()
    config = RetrainConfig(recipe_config)
    recipe = RetrainRecipe(config)
    return recipe

    
@utils.log_func(txt='recipe')
def run_retrain():
    recipe_config = get_retrain_recipe_config()
    config = RetrainConfig(recipe_config)
    image_folder, label_dataset, model_folder, output_folder = get_retrain_input_output()
    label_df = format_label_df(label_dataset, config.col_filename, config.col_label)
    retrain_recipe = get_retrain_recipe()
    new_model = retrain_recipe.compute(image_folder, model_folder, label_df, output_folder)
    return new_model
    # save_output_model(output_folder, new_model)


# SCORE RECIPE

In [11]:
import pandas as pd
import dku_deeplearning_image.utils as utils

from recipe import ScoreRecipe
from config import ScoreConfig
from utils_objects import DkuFileManager

SCORING_RECIPE_CONFIG = {'max_nb_labels': 2, 'min_threshold': 0.5, 'gpu_usage': 'all', 'gpu_memory': 'all', 'should_use_gpu': False}

def get_score_recipe_config():
    return SCORING_RECIPE_CONFIG

def get_score_input_output():
    file_manager = DkuFileManager()
    image_folder = dataiku.Folder(IMAGES_TO_CLASSIFY_FOLDER_ID)
    model_folder = dataiku.Folder(RESNET_MODEL_FOLDER_ID)
    output_dataset = dataiku.Folder(OUTPUT_SCORED_DATASET_ID)
    return image_folder, model_folder, output_dataset


@utils.log_func(txt='output dataset writing')
def write_output_dataset(output_dataset, image_folder, classification):
    images_paths = image_folder.list_paths_in_partition()
    output_df = utils.build_prediction_output_df(images_paths, classification)
    output_dataset.write_with_schema(pd.DataFrame(output_df))


def get_score_recipe():
    recipe_config = get_score_recipe_config()
    config = ScoreConfig(recipe_config)
    recipe = ScoreRecipe(config)
    return recipe


@utils.log_func(txt='recipe')
def run():
    recipe_config = get_score_recipe_config()
    config = ScoreConfig(recipe_config)

    image_folder, model_folder, output_dataset = get_score_input_output()
    recipe = get_score_recipe()

    classification = recipe.compute(image_folder, model_folder)

    return classification

    # write_output_dataset(output_dataset, image_folder, classification)

In [12]:
def get_prediction_test(dku_mdl, shape=False):
    if shape:
        dku_mdl.application.input_shape = (197, 197, 3)
    retrained_scores = dku_mdl.score_image_folder(
        images_folder=dataiku.Folder(IMAGES_TO_CLASSIFY_FOLDER_ID),
        limit=SCORING_RECIPE_CONFIG['max_nb_labels'],
        min_threshold=SCORING_RECIPE_CONFIG['min_threshold']
    )
    return retrained_scores

In [13]:
recipe_config = get_retrain_recipe_config()
config = RetrainConfig(recipe_config)
image_folder, label_dataset, model_folder, output_folder = get_retrain_input_output()
label_df = format_label_df(label_dataset, config.col_filename, config.col_label)

retrained_dku_model = DkuModel(model_folder)
retrained_dku_model.label_df = label_df
retrained_dku_model.load_model(config, constants.RETRAINING)

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:CPU:0',)


  tensor_proto.tensor_content = nparray.tostring()
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_nam

  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attrib

  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attrib

  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)


x :  Tensor("dropout_1/cond/Identity:0", shape=(None, None), dtype=float32)
predictions :  Tensor("predictions/Softmax:0", shape=(None, 2), dtype=float32)
predictions weights :  [array([[ 0.00454819, -0.00132101],
       [ 0.00103635,  0.00424267],
       [ 0.00748046,  0.00059973],
       ...,
       [-0.00228898, -0.00658472],
       [-0.00161955,  0.00310066],
       [-0.00400439, -0.00221714]], dtype=float32), array([0., 0.], dtype=float32)]


  self.model = Model(input=base_model.input, output=predictions)


In [15]:
retrained_dku_model.model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 197, 197, 3)  0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 203, 203, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 99, 99, 64)   9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 99, 99, 64)   256         conv1[0][0]                      
____________________________________________________________________________________________

In [10]:
recipe_config = get_score_recipe_config()
config = ScoreConfig(recipe_config)
image_folder, model_folder, output_dataset = get_score_input_output()

scoring_dku_model = DkuModel(model_folder)
scoring_dku_model.load_model(config, constants.SCORING)

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:CPU:0',)


  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attrib

  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attrib

  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attrib

  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attrib

In [14]:
get_prediction_test(retrained_dku_model, shape=True)

model.get_config() :  {'name': 'model_1', 'layers': [{'name': 'input_1', 'class_name': 'InputLayer', 'config': {'batch_input_shape': (None, 197, 197, 3), 'dtype': 'float32', 'sparse': False, 'name': 'input_1'}, 'inbound_nodes': []}, {'name': 'conv1_pad', 'class_name': 'ZeroPadding2D', 'config': {'name': 'conv1_pad', 'trainable': True, 'dtype': 'float32', 'padding': ((3, 3), (3, 3)), 'data_format': 'channels_last'}, 'inbound_nodes': [[['input_1', 0, 0, {}]]]}, {'name': 'conv1', 'class_name': 'Conv2D', 'config': {'name': 'conv1', 'trainable': True, 'dtype': 'float32', 'filters': 64, 'kernel_size': (7, 7), 'strides': (2, 2), 'padding': 'valid', 'data_format': 'channels_last', 'dilation_rate': (1, 1), 'activation': 'linear', 'use_bias': True, 'kernel_initializer': {'class_name': 'VarianceScaling', 'config': {'scale': 2.0, 'mode': 'fan_in', 'distribution': 'normal', 'seed': None}}, 'bias_initializer': {'class_name': 'Zeros', 'config': {}}, 'kernel_regularizer': None, 'bias_regularizer': Non

{'prediction': ['{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"tiger": 1.0}',
  '{"t

In [14]:
get_prediction_test(scoring_dku_model, shape=False)

model.get_config() :  {'name': 'resnet50', 'layers': [{'name': 'input_2', 'class_name': 'InputLayer', 'config': {'batch_input_shape': (None, 224, 224, 3), 'dtype': 'float32', 'sparse': False, 'name': 'input_2'}, 'inbound_nodes': []}, {'name': 'conv1_pad', 'class_name': 'ZeroPadding2D', 'config': {'name': 'conv1_pad', 'trainable': True, 'dtype': 'float32', 'padding': ((3, 3), (3, 3)), 'data_format': 'channels_last'}, 'inbound_nodes': [[['input_2', 0, 0, {}]]]}, {'name': 'conv1', 'class_name': 'Conv2D', 'config': {'name': 'conv1', 'trainable': True, 'dtype': 'float32', 'filters': 64, 'kernel_size': (7, 7), 'strides': (2, 2), 'padding': 'valid', 'data_format': 'channels_last', 'dilation_rate': (1, 1), 'activation': 'linear', 'use_bias': True, 'kernel_initializer': {'class_name': 'VarianceScaling', 'config': {'scale': 2.0, 'mode': 'fan_in', 'distribution': 'normal', 'seed': None}}, 'bias_initializer': {'class_name': 'Zeros', 'config': {}}, 'kernel_regularizer': None, 'bias_regularizer': No

{'prediction': ['{"lion": 0.9025413990020752}',
  '{"tiger": 0.8765539526939392}',
  '{"lion": 0.9820299744606018}',
  '{"lion": 0.9987598657608032}',
  '{"tiger": 0.902034342288971}',
  '{"tiger": 0.709359884262085}',
  '{"tiger": 0.6957002878189087}',
  '{"tiger": 0.8612405061721802}',
  '{"lion": 0.9999206066131592}',
  '{"tiger": 0.8114100694656372}',
  '{"lion": 0.9025336503982544}',
  '{"tiger": 0.9146807193756104}',
  '{"tiger": 0.8749715089797974}',
  '{"lion": 0.9996740818023682}',
  '{"lion": 0.9488356709480286}',
  '{"lion": 0.9682839512825012}',
  '{"tiger": 0.6540880799293518}',
  '{"lion": 0.9869621992111206}',
  '{"tiger": 0.9098891019821167}',
  '{"lion": 0.9999099969863892}',
  '{"lion": 0.9999911785125732}',
  '{"tiger": 0.9507588148117065}',
  '{"wallaby": 0.5368982553482056}',
  '{"lion": 0.9304139018058777}',
  '{"lion": 0.9962252378463745}',
  '{"tiger": 0.8375579714775085}',
  '{"lion": 0.9997742772102356}',
  '{"tiger": 0.9813266396522522}',
  '{"lion": 0.992700

In [11]:
retrained_dku_model.model.layers[-1].weights

[MirroredVariable:{
   0: <tf.Variable 'predictions/kernel:0' shape=(100352, 2) dtype=float32, numpy=
 array([[-2.3049410e-03,  2.0940499e-03],
        [-2.7186237e-05, -7.6145246e-03],
        [ 3.5143802e-03, -3.8044048e-03],
        ...,
        [-2.3053540e-03,  1.6650362e-03],
        [ 6.4894762e-03, -7.7259825e-03],
        [ 1.4388580e-03,  6.3173613e-03]], dtype=float32)>
 },
 MirroredVariable:{
   0: <tf.Variable 'predictions/bias:0' shape=(2,) dtype=float32, numpy=array([0., 0.], dtype=float32)>
 }]