In [1]:
%load_ext autoreload
%autoreload 2

from config.rnn import default
from models import RNNV2, RNN, RNNV12
import numpy as np
from functional import seq
import tensorflow as tf
from tensorpack import (TrainConfig, SyncMultiGPUTrainerParameterServer as Trainer, 
                        PredictConfig, MultiProcessDatasetPredictor as Predictor,
                        SaverRestore, logger)
from tensorpack.callbacks import (ScheduledHyperParamSetter, MaxSaver, ModelSaver,
                                  DataParallelInferenceRunner as InfRunner)
from tensorpack.predict import SimpleDatasetPredictor
from tensorpack.tfutils.common import get_default_sess_config
from utils import DataManager, select_label
from utils.validation import (Accumulator, AggregateMetric, calcu_metrics)

resnet_loc = "./data/resnet_v2_101/resnet_v2_101.ckpt"
model_loc = "./train_log/separate/read3-noglps-lmd-0/all-stages-max-micro-auc.tfmodel"
log_dir = './train_log/separate/D1-read3-mtd/'
logger.set_logger_dir(log_dir, action='d')

[32m[1105 08:34:14 @logger.py:74][0m Argv: /home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py -f /run/user/1007/jupyter/kernel-5d4da6c1-e5fd-4d18-86c4-95e6d1278021.json


In [2]:
config = default
ignore_restore = ['learning_rate', 'global_step']
save_name = "max-macro_f1.tfmodel"

In [3]:
config.stages = [2, 3, 4, 5, 6]
config.proportion = {'train': 0.55, 'val': 0.0, 'test': 0.45}
config.annotation_number = None
dm = DataManager.from_config(config)
train_set = dm.get_train_set()
test_set = dm.get_test_set()

In [5]:
config.proportion = {'train': 0.8, 'val':0.2, 'test': 0.0}
config.stages = [2]
config.annotation_number = 10
dm = DataManager.from_dataset(train_set, test_set, config)
dm.get_num_info()

{'test': (762, 3237), 'train': (692, 3016), 'val': (156, 572)}

In [6]:
dm.get_imbalance_ratio()

Unnamed: 0,train,val,test
anlage in statu nascendi,7.439024,7.666667,7.758621
cellular blastoderm,1.005797,1.025974,1.005263
dorsal ectoderm anlage in statu nascendi,6.44086,6.090909,6.398058
faint ubiquitous,5.653846,5.782609,4.953125
maternal,3.907801,4.37931,4.013158
posterior endoderm anlage in statu nascendi,15.878049,14.6,15.565217
procephalic ectoderm anlage in statu nascendi,7.65,7.666667,7.758621
subset,3.552632,3.457143,3.590361
ubiquitous,0.87027,0.714286,0.809976
ventral ectoderm anlage in statu nascendi,7.65,6.8,7.282609


In [7]:
config.weight_decay = 0.0
config.dropout_keep_prob = 0.4
config.gamma = 2
config.use_glimpse = False
config.use_hidden_dense = False
config.read_time = 3
config.batch_size = 64

threshold = 0.4
train_data = dm.get_train_stream()
val_data = dm.get_validation_stream()
#val_data = dm.get_test_stream()

In [9]:
np.set_printoptions(formatter={'float_kind': lambda x: '%.2f' % x})
model = RNN(config, is_finetuning=False, label_weights=dm.get_imbalance_ratio().train.values)
tf.reset_default_graph()
train_config = TrainConfig(model=model, dataflow=train_data,
                           callbacks=[
                               ScheduledHyperParamSetter('learning_rate', [(0, 1e-4),  
                                                                           (40, 1e-5)]),
                               InfRunner(val_data, [AggregateMetric(config.validation_metrics, threshold)],
                                         [1]),
                               ModelSaver(var_collections='model_variables'),
                               MaxSaver('micro_auc', save_name),
                           ],
                           session_init=SaverRestore(
                               model_path=resnet_loc, 
                               ignore=ignore_restore),
                           max_epoch=1, tower=[0])
Trainer(train_config).train()

[32m[1105 08:35:16 @inference_runner.py:82][0m InferenceRunner will eval on an InputSource of size 2
[32m[1105 08:35:16 @input_source.py:180][0m Setting up the queue 'QueueInput/input_queue' for CPU prefetching ...
[32m[1105 08:35:16 @training.py:90][0m Building graph for training tower 0 on device LeastLoadedDeviceSetter-/gpu:0...
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:logits.dtype=<dtype: 'float32'>.
INFO:tensorflow:multi_class_labels.dtype=<dtype: 'float32'>.
INFO:tensorflow:losses.dtype=<dtype: 'float32'>.
[32m[1105 08:35:21 @model_utils.py:47][0m [36mModel Parameters: 
[0mname                            shape

[32m[1105 08:35:26 @base.py:212][0m Creating the session ...
[32m[1105 08:35:28 @base.py:216][0m Initializing the session ...
[32m[1105 08:35:28 @sessinit.py:116][0m Restoring checkpoint from ./data/resnet_v2_101/resnet_v2_101.ckpt ...
INFO:tensorflow:Restoring parameters from ./data/resnet_v2_101/resnet_v2_101.ckpt
[32m[1105 08:35:30 @base.py:223][0m Graph Finalized.
[32m[1105 08:35:30 @param.py:144][0m After epoch 0, learning_rate will change to 0.00010000
[32m[1105 08:35:30 @concurrency.py:36][0m Starting EnqueueThread DataParallelInferenceRunner/QueueInput/input_queue ...
[32m[1105 08:35:32 @concurrency.py:36][0m Starting EnqueueThread QueueInput/input_queue ...
[32m[1105 08:35:32 @base.py:257][0m Start Epoch 1 ...


100%|##########|10/10[00:32<00:00, 0.51it/s]

[32m[1105 08:36:04 @base.py:267][0m Epoch 1 (global_step 10) finished, time:32.19 sec.



  'precision', 'predicted', average, warn_for)
100%|##########|2/2[00:14<00:00, 0.14it/s]


[32m[1105 08:36:20 @saver.py:90][0m Model saved to ./train_log/separate/D1-read3-mtd/model-10.
[32m[1105 08:36:21 @saver.py:159][0m Model with maximum 'micro_auc' saved.
[32m[1105 08:36:21 @monitor.py:362][0m DataParallelInferenceRunner/QueueInput/queue_size: 13
[32m[1105 08:36:21 @monitor.py:362][0m QueueInput/queue_size: 2.8828
[32m[1105 08:36:21 @monitor.py:362][0m coverage: 3.8594
[32m[1105 08:36:21 @monitor.py:362][0m learning_rate: 0.0001
[32m[1105 08:36:21 @monitor.py:362][0m loss/value_1: 0.56055
[32m[1105 08:36:21 @monitor.py:362][0m macro_auc: 0.75422
[32m[1105 08:36:21 @monitor.py:362][0m macro_f1: 0.14321
[32m[1105 08:36:21 @monitor.py:362][0m mean_average_precision: 0.43125
[32m[1105 08:36:21 @monitor.py:362][0m micro_auc: 0.74742
[32m[1105 08:36:21 @monitor.py:362][0m micro_f1: 0.52674
[32m[1105 08:36:21 @monitor.py:362][0m one_error: 0.40625
[32m[1105 08:36:21 @monitor.py:362][0m ranking_loss: 0.24713
[32m[1105 08:36:21 @monitor.py:362][0m r

In [9]:
tf.get_collection(tf.GraphKeys.MODEL_VARIABLES)

[<tf.Variable 'resnet_v2_101/conv1/weights:0' shape=(7, 7, 3, 64) dtype=float32_ref>,
 <tf.Variable 'resnet_v2_101/conv1/biases:0' shape=(64,) dtype=float32_ref>,
 <tf.Variable 'resnet_v2_101/block1/unit_1/bottleneck_v2/preact/beta:0' shape=(64,) dtype=float32_ref>,
 <tf.Variable 'resnet_v2_101/block1/unit_1/bottleneck_v2/preact/gamma:0' shape=(64,) dtype=float32_ref>,
 <tf.Variable 'resnet_v2_101/block1/unit_1/bottleneck_v2/preact/moving_mean:0' shape=(64,) dtype=float32_ref>,
 <tf.Variable 'resnet_v2_101/block1/unit_1/bottleneck_v2/preact/moving_variance:0' shape=(64,) dtype=float32_ref>,
 <tf.Variable 'resnet_v2_101/block1/unit_1/bottleneck_v2/shortcut/weights:0' shape=(1, 1, 64, 256) dtype=float32_ref>,
 <tf.Variable 'resnet_v2_101/block1/unit_1/bottleneck_v2/shortcut/biases:0' shape=(256,) dtype=float32_ref>,
 <tf.Variable 'resnet_v2_101/block1/unit_1/bottleneck_v2/conv1/weights:0' shape=(1, 1, 64, 64) dtype=float32_ref>,
 <tf.Variable 'resnet_v2_101/block1/unit_1/bottleneck_v2/co

In [17]:
model = RNN(config, is_finetuning=False)
tf.reset_default_graph()
pred_config = PredictConfig(model=model,
                            session_init=SaverRestore(
                                model_path=log_dir + save_name),
                            output_names=['logits_export', 'label'],
                            )
# pred = Predictor(pred_config, test_data, nr_proc=2, ordered=False)
pred = SimpleDatasetPredictor(pred_config, test_data)

accumulator = seq(pred.get_result()) \
    .smap(lambda a, b: (a.shape[0], calcu_metrics(a, b, config.validation_metrics, threshold))) \
    .aggregate(Accumulator(*config.validation_metrics), lambda accu, args: accu.feed(args[0], *args[1]))
metrics = accumulator.retrive()
print(metrics)

INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regularizer.
[32m[1104 15:51:24 @collection.py:133][0m New collections created in : tf.GraphKeys.MODEL_VARIABLES, resnet_v2_101_end_points
[32m[1104 15:51:24 @collection.py:152][0m These collections were modified but restored in : (tf.GraphKeys.SUMMARIES: 0->2), (tf.GraphKeys.UPDATE_OPS: 0->2)


ResourceExhaustedError: OOM when allocating tensor with shape[3,3,1024,512]
	 [[Node: custom_cnn/conv1/weights/Assign = Assign[T=DT_FLOAT, _class=["loc:@custom_cnn/conv1/weights"], use_locking=true, validate_shape=true, _device="/job:localhost/replica:0/task:0/gpu:0"](custom_cnn/conv1/weights, custom_cnn/conv1/weights/Initializer/truncated_normal)]]

Caused by op 'custom_cnn/conv1/weights/Assign', defined at:
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/tornado/ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 235, in dispatch_shell
    handler(stream, idents, msg)
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2698, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2802, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-17-5f1d17c4166a>", line 9, in <module>
    pred = SimpleDatasetPredictor(pred_config, test_data)
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/tensorpack/predict/dataset.py", line 66, in __init__
    self.predictor = OfflinePredictor(config)
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/tensorpack/predict/base.py", line 161, in __init__
    config.tower_func(*input.get_input_tensors())
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/tensorpack/tfutils/tower.py", line 198, in __call__
    output = self._tower_fn(*args)
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/tensorpack/graph_builder/model_desc.py", line 119, in build_graph
    self._build_graph(inputs)
  File "/home/yangyang/Documents/flyexpress/DL_biomedicine_image/models/rnn.py", line 192, in _build_graph
    feature = extract_feature_resnet(image, ctx.is_training, self.is_finetuning, self.config.weight_decay)
  File "/home/yangyang/Documents/flyexpress/DL_biomedicine_image/models/image_utils.py", line 60, in extract_feature_resnet
    conv = slim.conv2d(feature, 512, (3, 3), stride=2, scope='conv1')
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 181, in func_with_args
    return func(*args, **current_args)
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/tensorflow/contrib/layers/python/layers/layers.py", line 1027, in convolution
    outputs = layer.apply(inputs)
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/tensorflow/python/layers/base.py", line 503, in apply
    return self.__call__(inputs, *args, **kwargs)
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/tensorflow/python/layers/base.py", line 443, in __call__
    self.build(input_shapes[0])
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/tensorflow/python/layers/convolutional.py", line 137, in build
    dtype=self.dtype)
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/tensorflow/python/layers/base.py", line 383, in add_variable
    trainable=trainable and self.trainable)
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py", line 1065, in get_variable
    use_resource=use_resource, custom_getter=custom_getter)
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py", line 962, in get_variable
    use_resource=use_resource, custom_getter=custom_getter)
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py", line 360, in get_variable
    validate_shape=validate_shape, use_resource=use_resource)
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/tensorflow/contrib/layers/python/layers/layers.py", line 1561, in layer_variable_getter
    return _model_variable_getter(getter, *args, **kwargs)
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/tensorflow/contrib/layers/python/layers/layers.py", line 1553, in _model_variable_getter
    custom_getter=getter, use_resource=use_resource)
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 181, in func_with_args
    return func(*args, **current_args)
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/tensorflow/contrib/framework/python/ops/variables.py", line 261, in model_variable
    use_resource=use_resource)
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 181, in func_with_args
    return func(*args, **current_args)
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/tensorflow/contrib/framework/python/ops/variables.py", line 216, in variable
    use_resource=use_resource)
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py", line 352, in _true_getter
    use_resource=use_resource)
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py", line 725, in _get_single_variable
    validate_shape=validate_shape)
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/variables.py", line 199, in __init__
    expected_shape=expected_shape)
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/variables.py", line 320, in _init_from_args
    validate_shape=validate_shape).op
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/state_ops.py", line 274, in assign
    validate_shape=validate_shape)
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/gen_state_ops.py", line 43, in assign
    use_locking=use_locking, name=name)
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 767, in apply_op
    op_def=op_def)
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2630, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1204, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[3,3,1024,512]
	 [[Node: custom_cnn/conv1/weights/Assign = Assign[T=DT_FLOAT, _class=["loc:@custom_cnn/conv1/weights"], use_locking=true, validate_shape=true, _device="/job:localhost/replica:0/task:0/gpu:0"](custom_cnn/conv1/weights, custom_cnn/conv1/weights/Initializer/truncated_normal)]]


In [None]:
import pickle

In [None]:
with open