In [1]:
import dataiku
from dataiku import pandasutils as pdu
import pandas as pd
%config Application.log_level='ERROR'

In [2]:
dataiku.use_plugin_libs('deeplearning-image-v2')

In [3]:
RETRAIN_IMAGES_FOLDER_ID = 'Images_for_retraining'
IMAGES_TO_CLASSIFY_FOLDER_ID = 'images_to_classify_sample'
LABELS_DATASET_ID = 'labels'
RESNET_MODEL_FOLDER_ID = 'resnet_model'
OUTPUT_RETRAINED_MODEL_ID = 'retrained_model'
OUTPUT_SCORED_DATASET_ID = 'classified_images'

# RETRAIN RECIPE

In [4]:
import dku_deeplearning_image.utils as utils
import dku_deeplearning_image.constants as constants

from recipe import RetrainRecipe
from config import RetrainConfig
from utils_objects import DkuModel
from utils_objects import DkuFileManager

RETRAIN_RECIPE_CONFIG = {'n_classes':1000, 'random_seed': 1338, 'train_ratio': 0.9, 'gpu_usage': 'all', 'gpu_memory': 'all', 'layer_to_retrain': 'last', 'layer_to_retrain_n': 2, 'model_dropout': 0.5, 'model_reg': {'l2': 0, 'l1': 0}, 'model_optimizer': 'adam', 'model_learning_rate': 0.001, 'batch_size': 10, 'nb_epochs': 5, 'nb_steps_per_epoch': 5, 'nb_validation_steps': 5, 'model_custom_params_opti': [], 'n_augmentation': 2, 'model_custom_params_data_augmentation': [], 'data_augmentation': False, 'tensorboard': True, 'should_use_gpu': False, 'model_pooling': 'avg', 'image_width': 197, 'image_height': 197, 'col_filename': 'path', 'col_label': 'label'}
#RETRAIN_RECIPE_CONFIG = {'n_classes': 1000,'max_nb_labels':1000, 'min_threshold': 0.0, 'gpu_usage': 'all', 'gpu_memory': 'all', 'should_use_gpu': False}

def get_retrain_recipe_config():
    return RETRAIN_RECIPE_CONFIG


def get_retrain_input_output():
    file_manager = DkuFileManager()
    image_folder = dataiku.Folder(RETRAIN_IMAGES_FOLDER_ID)
    model_folder = dataiku.Folder(RESNET_MODEL_FOLDER_ID)
    label_dataset = dataiku.Dataset(LABELS_DATASET_ID)
    output_folder = dataiku.Folder(OUTPUT_RETRAINED_MODEL_ID)
    return image_folder, label_dataset, model_folder, output_folder


def format_label_df(label_dataset, col_filename, col_label):
    renaming_mapping = {
        col_filename: constants.FILENAME,
        col_label: constants.LABEL
    }
    label_df = label_dataset.get_dataframe().rename(columns=renaming_mapping)[list(renaming_mapping.values())]
    return label_df


def save_output_model(output_folder, model):
    output_model = model.deepcopy(folder=output_folder)
    output_model.save_to_folder()


def get_retrain_recipe():
    recipe_config = get_retrain_recipe_config()
    config = RetrainConfig(recipe_config)
    recipe = RetrainRecipe(config)
    return recipe

""" 
@utils.log_func(txt='recipe')
def run_retrain():
    recipe_config = get_retrain_recipe_config()
    config = RetrainConfig(recipe_config)
    image_folder, label_dataset, model_folder, output_folder = get_retrain_input_output()
    print('wtf')
    label_df = format_label_df(label_dataset, config.col_filename, config.col_label)
    retrain_recipe = get_retrain_recipe()
    new_model = retrain_recipe.compute(image_folder, model_folder, label_df, output_folder)
    return new_model
    # save_output_model(output_folder, new_model)
""" 

Using TensorFlow backend.


" \n@utils.log_func(txt='recipe')\ndef run_retrain():\n    recipe_config = get_retrain_recipe_config()\n    config = RetrainConfig(recipe_config)\n    image_folder, label_dataset, model_folder, output_folder = get_retrain_input_output()\n    print('wtf')\n    label_df = format_label_df(label_dataset, config.col_filename, config.col_label)\n    retrain_recipe = get_retrain_recipe()\n    new_model = retrain_recipe.compute(image_folder, model_folder, label_df, output_folder)\n    return new_model\n    # save_output_model(output_folder, new_model)\n"

# SCORE RECIPE

In [5]:
import pandas as pd
import dku_deeplearning_image.utils as utils

from recipe import ScoreRecipe
from config import ScoreConfig
from utils_objects import DkuFileManager

SCORING_RECIPE_CONFIG = {'n_classes': 1000,'max_nb_labels':1000, 'min_threshold': 0.0, 'gpu_usage': 'all', 'gpu_memory': 'all', 'should_use_gpu': False}

def get_score_recipe_config():
    return SCORING_RECIPE_CONFIG

def get_score_input_output():
    file_manager = DkuFileManager()
    image_folder = dataiku.Folder(IMAGES_TO_CLASSIFY_FOLDER_ID)
    model_folder = dataiku.Folder(RESNET_MODEL_FOLDER_ID)
    output_dataset = dataiku.Folder(OUTPUT_SCORED_DATASET_ID)
    return image_folder, model_folder, output_dataset


@utils.log_func(txt='output dataset writing')
def write_output_dataset(output_dataset, image_folder, classification):
    images_paths = image_folder.list_paths_in_partition()
    output_df = utils.build_prediction_output_df(images_paths, classification)
    output_dataset.write_with_schema(pd.DataFrame(output_df))


def get_score_recipe():
    recipe_config = get_score_recipe_config()
    config = ScoreConfig(recipe_config)
    recipe = ScoreRecipe(config)
    return recipe


""" 
@utils.log_func(txt='recipe')
def run():
    recipe_config = get_score_recipe_config()
    config = ScoreConfig(recipe_config)

    image_folder, model_folder, output_dataset = get_score_input_output()
    recipe = get_score_recipe()

    classification = recipe.compute(image_folder, model_folder)

    return classification

    # write_output_dataset(output_dataset, image_folder, classification)
""" 

" \n@utils.log_func(txt='recipe')\ndef run():\n    recipe_config = get_score_recipe_config()\n    config = ScoreConfig(recipe_config)\n\n    image_folder, model_folder, output_dataset = get_score_input_output()\n    recipe = get_score_recipe()\n\n    classification = recipe.compute(image_folder, model_folder)\n\n    return classification\n\n    # write_output_dataset(output_dataset, image_folder, classification)\n"

In [6]:
def get_prediction_test(dku_mdl, shape=False):
    if shape:
        dku_mdl.application.input_shape = (197, 197, 3)
    retrained_scores = dku_mdl.score_image_folder(
        images_folder=dataiku.Folder(IMAGES_TO_CLASSIFY_FOLDER_ID),
        limit=SCORING_RECIPE_CONFIG['max_nb_labels'],
        min_threshold=SCORING_RECIPE_CONFIG['min_threshold']
    )
    return retrained_scores

In [8]:
score_recipe_config = get_score_recipe_config()
score_config = ScoreConfig(score_recipe_config)
image_folder, model_folder, output_dataset = get_score_input_output()

scoring_dku_model = DkuModel(model_folder)
scoring_dku_model.load_model(score_config, constants.SCORING)

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:CPU:0',)


  tensor_proto.tensor_content = nparray.tostring()
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_nam

  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attrib

  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attrib

  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)


In [15]:
retrain_recipe_config = get_retrain_recipe_config()
retrain_config = RetrainConfig(retrain_recipe_config)
image_folder, label_dataset, model_folder, output_folder = get_retrain_input_output()
label_df = format_label_df(label_dataset, retrain_config.col_filename, retrain_config.col_label)

retrained_dku_model = DkuModel(model_folder)
#retrained_dku_model.set_distinct_labels(LABEL_LIST)
retrained_dku_model.set_label_df(scoring_dku_model.label_df) #label_df
retrained_dku_model.load_model(retrain_config, constants.RETRAINING)

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:CPU:0',)


  tensor_proto.tensor_content = nparray.tostring()
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_nam

  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attrib

  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attrib

  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)
  check_attribute_name(name)


enrich top param
Get distinct labels
Get label df
Build label df
coucou1 (None, 7, 7, 2048)
coucou2 (None, 2048) avg
x :  Tensor("global_average_pooling2d_1/Mean:0", shape=(None, 2048), dtype=float32)
predictions :  Tensor("predictions/Softmax:0", shape=(None, 998), dtype=float32)
predictions weights :  [array([[ 0.02154265, -0.00245228,  0.01506522, ..., -0.00348612,
         0.01923618, -0.01933027],
       [-0.00948967,  0.01623805,  0.0002616 , ...,  0.02254168,
        -0.03848182, -0.01094262],
       [ 0.03947154, -0.0358363 ,  0.02844768, ...,  0.00820304,
        -0.02399672, -0.01481179],
       ...,
       [ 0.01814706, -0.0225831 ,  0.03542458, ..., -0.02254002,
        -0.03549471, -0.03627592],
       [ 0.04106516,  0.00524502,  0.00167912, ...,  0.03658203,
         0.01550507,  0.04239261],
       [-0.02519783, -0.02952943, -0.01780739, ..., -0.01449656,
        -0.02051874, -0.01815059]], dtype=float32), array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.

  self.model = Model(input=base_model.input, output=predictions)


In [None]:
retrained_dku_model.model.summary()

In [None]:
len(retrained_dku_model.model.layers)

In [None]:
scoring_dku_model.model.summary()

In [None]:
len(scoring_dku_model.model.layers)

In [None]:
retrained_dku_model.label_df

In [None]:
get_prediction_test(retrained_dku_model, shape=True)

In [None]:
scoring_dku_model.model.layers[-1].kernel_regularizer

In [None]:
retrained_dku_model.model.layers[-1].kernel_regularizer

In [10]:
import json

list(json.loads(pr['prediction'][0]).keys())

NameError: name 'pr' is not defined

In [11]:
get_prediction_test(scoring_dku_model, shape=False)

Get label df
Build label df
model.get_config() :  {'name': 'resnet50', 'layers': [{'name': 'input_1', 'class_name': 'InputLayer', 'config': {'batch_input_shape': (None, 224, 224, 3), 'dtype': 'float32', 'sparse': False, 'name': 'input_1'}, 'inbound_nodes': []}, {'name': 'conv1_pad', 'class_name': 'ZeroPadding2D', 'config': {'name': 'conv1_pad', 'trainable': True, 'dtype': 'float32', 'padding': ((3, 3), (3, 3)), 'data_format': 'channels_last'}, 'inbound_nodes': [[['input_1', 0, 0, {}]]]}, {'name': 'conv1', 'class_name': 'Conv2D', 'config': {'name': 'conv1', 'trainable': True, 'dtype': 'float32', 'filters': 64, 'kernel_size': (7, 7), 'strides': (2, 2), 'padding': 'valid', 'data_format': 'channels_last', 'dilation_rate': (1, 1), 'activation': 'linear', 'use_bias': True, 'kernel_initializer': {'class_name': 'VarianceScaling', 'config': {'scale': 2.0, 'mode': 'fan_in', 'distribution': 'normal', 'seed': None}}, 'bias_initializer': {'class_name': 'Zeros', 'config': {}}, 'kernel_regularizer': 

PREDICTION [[2.2627104e-08 6.7665539e-08 3.0595615e-08 ... 7.6209410e-08
  3.3609123e-08 5.4573281e-08]
 [3.6056480e-08 1.2215345e-08 2.3474573e-08 ... 9.4195940e-09
  4.2739962e-06 1.0346831e-06]]


{'prediction': ['{"tiger": 0.8506215810775757, "tiger_cat": 0.14790935814380646, "jaguar": 0.0006382923456840217, "zebra": 0.0002970591885969043, "lynx": 9.156941814580932e-05, "tabby": 3.314833884360269e-05, "leopard": 3.194835517206229e-05, "common_iguana": 2.59058997471584e-05, "Egyptian_cat": 1.7533655409351923e-05, "Madagascar_cat": 1.5622592400177382e-05, "red_wolf": 1.4412947166420054e-05, "dhole": 9.513076292932965e-06, "patas": 9.504171430307906e-06, "frilled_lizard": 8.699903446540702e-06, "coyote": 7.871093657740857e-06, "boxer": 7.0975461312627885e-06, "Rhodesian_ridgeback": 6.4309851950383745e-06, "whiptail": 6.39881818642607e-06, "impala": 5.962898285361007e-06, "cougar": 5.8718910622701515e-06, "gazelle": 5.558254997595213e-06, "Leonberg": 5.511882136488566e-06, "greenhouse": 5.147187948750798e-06, "whippet": 4.635635377781e-06, "Great_Dane": 4.512550276558613e-06, "red_fox": 3.947306140617002e-06, "dalmatian": 3.8737575778213795e-06, "tile_roof": 3.653740350273438e-06, 

In [14]:
scoring_dku_model.label_df

Unnamed: 0_level_0,__dku__image_label
id,Unnamed: 1_level_1
0,tench
1,goldfish
2,great_white_shark
3,tiger_shark
4,hammerhead
...,...
995,earthstar
996,hen-of-the-woods
997,bolete
998,ear


In [13]:
import json
list(json.loads(pr['prediction'][0]).keys())

NameError: name 'pr' is not defined

In [None]:
retrained_dku_model.model.layers[-1].weights

In [None]:
pd.DataFrame(['lion', 'tiger'])

In [None]:
pd.DataFrame([{'id': 0, '__dku__image_label': 'lion'}, {'id': 1, '__dku__image_label': 'tiger'}]).set_index('id')

In [None]:
retrained_dku_model

In [None]:
retrained_dku_model.model.predict()

In [None]:
test_image = image_folder.get_download_stream('/2_lion.jpg')

In [None]:
test_image

In [None]:
preprocessed_img = utils.preprocess_img(
    img_path=test_image,
    img_shape=scoring_dku_model.get_input_shape(),
    preprocessing=scoring_dku_model.application.preprocessing
)

In [None]:
scoring_dku_model.model.predict(np.array([preprocessed_img]))

In [None]:
retrained_dku_model.get_input_shape()

In [None]:
import numpy as np

In [None]:
np.array([preprocessed_img])