In [12]:
%load_ext autoreload
%autoreload 2

from config.rnn import default
from models import RNNV2, RNN, RNNV12
import numpy as np
from functional import seq
import tensorflow as tf
from tensorpack import (TrainConfig, SyncMultiGPUTrainerParameterServer as Trainer, 
                        PredictConfig, MultiProcessDatasetPredictor as Predictor,
                        SaverRestore, logger)
from tensorpack.callbacks import (ScheduledHyperParamSetter, MaxSaver, ModelSaver,
                                  DataParallelInferenceRunner as InfRunner)
from tensorpack.predict import SimpleDatasetPredictor
from tensorpack.tfutils.common import get_default_sess_config
from utils import DataManager
from utils.validation import (Accumulator, AggregateMetric, calcu_metrics)

resnet_loc = "./data/resnet_v2_101/resnet_v2_101.ckpt"
log_dir = './train_log/all-stage/'
logger.set_logger_dir(log_dir, action='d')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
[32m[1103 17:53:53 @logger.py:74][0m Argv: /home/yangyang/Applications/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py -f /run/user/1007/jupyter/kernel-c5773769-f885-44ee-affa-4308d0f383e1.json


In [13]:
config = default
ignore_restore = ['learning_rate', 'global_step']
save_name = "all-stages-max-micro-auc.tfmodel"

In [14]:
config.stages = [2, 3, 4, 5, 6]
config.proportion = {'train': 0.55, 'val': 0.0, 'test': 0.45}
config.annotation_number = None
dm = DataManager.from_config(config)
train_set = dm.get_train_set()
test_set = dm.get_test_set()

In [15]:
config.proportion = {'train': 0.7, 'val':0.3, 'test': 0.0}
config.stages = [2, 3, 4, 5, 6]
config.annotation_number = 20
dm = DataManager.from_dataset(train_set, test_set, config)
dm.get_num_info()

{'test': (3780, 16120), 'train': (2999, 13163), 'val': (1177, 5297)}

In [16]:
dm.get_imbalance_ratio()

Unnamed: 0,train,val,test
anterior midgut primordium,8.115502,7.654412,8.264706
brain primordium,12.09607,11.934066,12.309859
cellular blastoderm,9.063758,8.491935,8.947368
embryonic brain,8.401254,8.34127,8.379653
embryonic central nervous system,12.884259,11.134021,13.482759
embryonic dorsal epidermis,11.60084,10.653465,11.813559
embryonic hindgut,10.025735,9.603604,10.183432
embryonic midgut,5.694196,5.575419,5.65493
embryonic ventral epidermis,13.146226,11.934066,13.538462
embryonic/larval muscle system,12.09607,12.077778,12.170732


In [17]:
config.weight_decay = 0.0
config.dropout_keep_prob = 0.5
config.gamma = 2
config.use_glimpse = 
config.doubly_stochastic_lambda = 1e-3
config.read_time = 5
config.batch_size = 64

threshold = 0.4
train_data = dm.get_train_stream()
val_data = dm.get_validation_stream()

In [18]:
np.set_printoptions(formatter={'float_kind': lambda x: '%.2f' % x})
model = RNN(config, is_finetuning=False, 
              label_scale=dm.get_imbalance_ratio().train.values)
tf.reset_default_graph()
train_config = TrainConfig(model=model, dataflow=train_data,
                           callbacks=[
                               ScheduledHyperParamSetter('learning_rate', [(0, 1e-4), (15, 1e-4), 
                                                                           (40, 1e-5)]),
                               InfRunner(val_data, [AggregateMetric(config.validation_metrics, threshold)],
                                         [0]),
                               ModelSaver(var_collections='model_variables'),
                               MaxSaver('micro_auc', save_name),
                           ],
                           session_init=SaverRestore(
                               model_path=resnet_loc, ignore=ignore_restore),
                           max_epoch=22, tower=[0])
Trainer(train_config).train()

[32m[1103 17:54:01 @inference_runner.py:82][0m InferenceRunner will eval on an InputSource of size 18
[32m[1103 17:54:01 @input_source.py:180][0m Setting up the queue 'QueueInput/input_queue' for CPU prefetching ...
[32m[1103 17:54:01 @training.py:90][0m Building graph for training tower 0 on device LeastLoadedDeviceSetter-/gpu:0...
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Scale 

[32m[1103 17:54:11 @base.py:212][0m Creating the session ...
[32m[1103 17:54:13 @base.py:216][0m Initializing the session ...
[32m[1103 17:54:13 @sessinit.py:116][0m Restoring checkpoint from ./data/resnet_v2_101/resnet_v2_101.ckpt ...
INFO:tensorflow:Restoring parameters from ./data/resnet_v2_101/resnet_v2_101.ckpt
[32m[1103 17:54:14 @base.py:223][0m Graph Finalized.
[32m[1103 17:54:14 @param.py:144][0m After epoch 0, learning_rate will change to 0.00010000
[32m[1103 17:54:15 @concurrency.py:36][0m Starting EnqueueThread DataParallelInferenceRunner/QueueInput/input_queue ...
[32m[1103 17:54:16 @concurrency.py:36][0m Starting EnqueueThread QueueInput/input_queue ...
[32m[1103 17:54:16 @base.py:257][0m Start Epoch 1 ...


100%|##########|46/46[01:44<00:00, 0.44it/s]

[32m[1103 17:56:00 @base.py:267][0m Epoch 1 (global_step 46) finished, time:104.45 sec.



  'precision', 'predicted', average, warn_for)
100%|##########|18/18[00:28<00:00, 0.59it/s]


[32m[1103 17:56:30 @saver.py:90][0m Model saved to ./train_log/all-stage/model-46.
[32m[1103 17:56:31 @saver.py:159][0m Model with maximum 'micro_auc' saved.
[32m[1103 17:56:31 @monitor.py:362][0m DataParallelInferenceRunner/QueueInput/queue_size: 49
[32m[1103 17:56:31 @monitor.py:362][0m QueueInput/queue_size: 4.55
[32m[1103 17:56:31 @monitor.py:362][0m coverage: 8.8637
[32m[1103 17:56:31 @monitor.py:362][0m learning_rate: 0.0001
[32m[1103 17:56:31 @monitor.py:362][0m loss/value: 0.27497
[32m[1103 17:56:31 @monitor.py:362][0m macro_auc: 0.82476
[32m[1103 17:56:31 @monitor.py:362][0m macro_f1: 0.1398
[32m[1103 17:56:31 @monitor.py:362][0m mean_average_precision: 0.42967
[32m[1103 17:56:31 @monitor.py:362][0m micro_auc: 0.63218
[32m[1103 17:56:31 @monitor.py:362][0m micro_f1: 0.22909
[32m[1103 17:56:31 @monitor.py:362][0m one_error: 0.8941
[32m[1103 17:56:31 @monitor.py:362][0m ranking_loss: 0.36868
[32m[1103 17:56:31 @monitor.py:362][0m ranking_mean_averag

100%|##########|46/46[01:21<00:00, 0.56it/s]

[32m[1103 17:57:53 @base.py:267][0m Epoch 2 (global_step 92) finished, time:81.60 sec.



100%|##########|18/18[00:28<00:00, 0.65it/s]


[32m[1103 17:58:21 @saver.py:90][0m Model saved to ./train_log/all-stage/model-92.
[32m[1103 17:58:22 @saver.py:159][0m Model with maximum 'micro_auc' saved.
[32m[1103 17:58:22 @monitor.py:362][0m DataParallelInferenceRunner/QueueInput/queue_size: 49.871
[32m[1103 17:58:22 @monitor.py:362][0m QueueInput/queue_size: 25.804
[32m[1103 17:58:22 @monitor.py:362][0m coverage: 8.5148
[32m[1103 17:58:22 @monitor.py:362][0m learning_rate: 0.0001
[32m[1103 17:58:22 @monitor.py:362][0m loss/value: 0.22649
[32m[1103 17:58:22 @monitor.py:362][0m macro_auc: 0.87087
[32m[1103 17:58:22 @monitor.py:362][0m macro_f1: 0.16035
[32m[1103 17:58:22 @monitor.py:362][0m mean_average_precision: 0.49646
[32m[1103 17:58:22 @monitor.py:362][0m micro_auc: 0.6829
[32m[1103 17:58:22 @monitor.py:362][0m micro_f1: 0.25494
[32m[1103 17:58:22 @monitor.py:362][0m one_error: 0.86632
[32m[1103 17:58:22 @monitor.py:362][0m ranking_loss: 0.34894
[32m[1103 17:58:22 @monitor.py:362][0m ranking_mean

100%|##########|46/46[01:23<00:00, 0.54it/s]

[32m[1103 17:59:46 @base.py:267][0m Epoch 3 (global_step 138) finished, time:83.60 sec.



100%|##########|18/18[00:25<00:00, 0.74it/s]


[32m[1103 18:00:11 @saver.py:90][0m Model saved to ./train_log/all-stage/model-138.
[32m[1103 18:00:12 @saver.py:159][0m Model with maximum 'micro_auc' saved.
[32m[1103 18:00:12 @monitor.py:362][0m DataParallelInferenceRunner/QueueInput/queue_size: 49.178
[32m[1103 18:00:12 @monitor.py:362][0m QueueInput/queue_size: 47.674
[32m[1103 18:00:12 @monitor.py:362][0m coverage: 8.3238
[32m[1103 18:00:12 @monitor.py:362][0m learning_rate: 0.0001
[32m[1103 18:00:12 @monitor.py:362][0m loss/value: 0.20087
[32m[1103 18:00:12 @monitor.py:362][0m macro_auc: 0.88923
[32m[1103 18:00:12 @monitor.py:362][0m macro_f1: 0.22925
[32m[1103 18:00:12 @monitor.py:362][0m mean_average_precision: 0.54392
[32m[1103 18:00:12 @monitor.py:362][0m micro_auc: 0.71221
[32m[1103 18:00:12 @monitor.py:362][0m micro_f1: 0.26351
[32m[1103 18:00:12 @monitor.py:362][0m one_error: 0.74219
[32m[1103 18:00:12 @monitor.py:362][0m ranking_loss: 0.33013
[32m[1103 18:00:12 @monitor.py:362][0m ranking_me

100%|##########|46/46[01:22<00:00, 0.57it/s]

[32m[1103 18:01:34 @base.py:267][0m Epoch 4 (global_step 184) finished, time:82.66 sec.



100%|##########|18/18[00:24<00:00, 0.70it/s]


[32m[1103 18:02:00 @saver.py:90][0m Model saved to ./train_log/all-stage/model-184.
[32m[1103 18:02:00 @saver.py:159][0m Model with maximum 'micro_auc' saved.
[32m[1103 18:02:00 @monitor.py:362][0m DataParallelInferenceRunner/QueueInput/queue_size: 49.295
[32m[1103 18:02:00 @monitor.py:362][0m QueueInput/queue_size: 49.738
[32m[1103 18:02:00 @monitor.py:362][0m coverage: 7.7005
[32m[1103 18:02:00 @monitor.py:362][0m learning_rate: 0.0001
[32m[1103 18:02:00 @monitor.py:362][0m loss/value: 0.18623
[32m[1103 18:02:00 @monitor.py:362][0m macro_auc: 0.89911
[32m[1103 18:02:00 @monitor.py:362][0m macro_f1: 0.32258
[32m[1103 18:02:00 @monitor.py:362][0m mean_average_precision: 0.5841
[32m[1103 18:02:00 @monitor.py:362][0m micro_auc: 0.75879
[32m[1103 18:02:00 @monitor.py:362][0m micro_f1: 0.32118
[32m[1103 18:02:00 @monitor.py:362][0m one_error: 0.74653
[32m[1103 18:02:00 @monitor.py:362][0m ranking_loss: 0.2849
[32m[1103 18:02:00 @monitor.py:362][0m ranking_mean

100%|##########|46/46[01:22<00:00, 0.57it/s]

[32m[1103 18:03:23 @base.py:267][0m Epoch 5 (global_step 230) finished, time:82.78 sec.



100%|##########|18/18[00:25<00:00, 0.70it/s]


[32m[1103 18:03:49 @saver.py:90][0m Model saved to ./train_log/all-stage/model-230.
[32m[1103 18:03:50 @saver.py:159][0m Model with maximum 'micro_auc' saved.
[32m[1103 18:03:50 @monitor.py:362][0m DataParallelInferenceRunner/QueueInput/queue_size: 49.743
[32m[1103 18:03:50 @monitor.py:362][0m QueueInput/queue_size: 49.934
[32m[1103 18:03:50 @monitor.py:362][0m coverage: 7.2257
[32m[1103 18:03:50 @monitor.py:362][0m learning_rate: 0.0001
[32m[1103 18:03:50 @monitor.py:362][0m loss/value: 0.1784
[32m[1103 18:03:50 @monitor.py:362][0m macro_auc: 0.90727
[32m[1103 18:03:50 @monitor.py:362][0m macro_f1: 0.32127
[32m[1103 18:03:50 @monitor.py:362][0m mean_average_precision: 0.59684
[32m[1103 18:03:50 @monitor.py:362][0m micro_auc: 0.78323
[32m[1103 18:03:50 @monitor.py:362][0m micro_f1: 0.32485
[32m[1103 18:03:50 @monitor.py:362][0m one_error: 0.67361
[32m[1103 18:03:50 @monitor.py:362][0m ranking_loss: 0.24686
[32m[1103 18:03:50 @monitor.py:362][0m ranking_mea

100%|##########|46/46[01:22<00:00, 0.50it/s]

[32m[1103 18:05:13 @base.py:267][0m Epoch 6 (global_step 276) finished, time:82.71 sec.



100%|##########|18/18[00:25<00:00, 0.73it/s]


[32m[1103 18:05:38 @saver.py:90][0m Model saved to ./train_log/all-stage/model-276.
[32m[1103 18:05:39 @saver.py:159][0m Model with maximum 'micro_auc' saved.
[32m[1103 18:05:39 @monitor.py:362][0m DataParallelInferenceRunner/QueueInput/queue_size: 49.556
[32m[1103 18:05:39 @monitor.py:362][0m QueueInput/queue_size: 49.972
[32m[1103 18:05:39 @monitor.py:362][0m coverage: 6.3056
[32m[1103 18:05:39 @monitor.py:362][0m learning_rate: 0.0001
[32m[1103 18:05:39 @monitor.py:362][0m loss/value: 0.16837
[32m[1103 18:05:39 @monitor.py:362][0m macro_auc: 0.91661
[32m[1103 18:05:39 @monitor.py:362][0m macro_f1: 0.35777
[32m[1103 18:05:39 @monitor.py:362][0m mean_average_precision: 0.63031
[32m[1103 18:05:39 @monitor.py:362][0m micro_auc: 0.83302
[32m[1103 18:05:39 @monitor.py:362][0m micro_f1: 0.35373
[32m[1103 18:05:39 @monitor.py:362][0m one_error: 0.52691
[32m[1103 18:05:39 @monitor.py:362][0m ranking_loss: 0.19311
[32m[1103 18:05:39 @monitor.py:362][0m ranking_me

100%|##########|46/46[01:23<00:00, 0.55it/s]

[32m[1103 18:07:02 @base.py:267][0m Epoch 7 (global_step 322) finished, time:83.09 sec.



100%|##########|18/18[00:24<00:00, 0.72it/s]


[32m[1103 18:07:27 @saver.py:90][0m Model saved to ./train_log/all-stage/model-322.
[32m[1103 18:07:28 @saver.py:159][0m Model with maximum 'micro_auc' saved.
[32m[1103 18:07:28 @monitor.py:362][0m DataParallelInferenceRunner/QueueInput/queue_size: 49.319
[32m[1103 18:07:28 @monitor.py:362][0m QueueInput/queue_size: 49.98
[32m[1103 18:07:28 @monitor.py:362][0m coverage: 5.5642
[32m[1103 18:07:28 @monitor.py:362][0m learning_rate: 0.0001
[32m[1103 18:07:28 @monitor.py:362][0m loss/value: 0.1598
[32m[1103 18:07:28 @monitor.py:362][0m macro_auc: 0.92277
[32m[1103 18:07:28 @monitor.py:362][0m macro_f1: 0.38321
[32m[1103 18:07:28 @monitor.py:362][0m mean_average_precision: 0.64578
[32m[1103 18:07:28 @monitor.py:362][0m micro_auc: 0.85159
[32m[1103 18:07:28 @monitor.py:362][0m micro_f1: 0.37056
[32m[1103 18:07:28 @monitor.py:362][0m one_error: 0.57899
[32m[1103 18:07:28 @monitor.py:362][0m ranking_loss: 0.17246
[32m[1103 18:07:28 @monitor.py:362][0m ranking_mean

100%|##########|46/46[01:24<00:00, 0.53it/s]

[32m[1103 18:08:52 @base.py:267][0m Epoch 8 (global_step 368) finished, time:84.12 sec.



100%|##########|18/18[00:25<00:00, 0.71it/s]


[32m[1103 18:09:18 @saver.py:90][0m Model saved to ./train_log/all-stage/model-368.
[32m[1103 18:09:19 @saver.py:159][0m Model with maximum 'micro_auc' saved.
[32m[1103 18:09:19 @monitor.py:362][0m DataParallelInferenceRunner/QueueInput/queue_size: 49.743
[32m[1103 18:09:19 @monitor.py:362][0m QueueInput/queue_size: 49.998
[32m[1103 18:09:19 @monitor.py:362][0m coverage: 4.7691
[32m[1103 18:09:19 @monitor.py:362][0m learning_rate: 0.0001
[32m[1103 18:09:19 @monitor.py:362][0m loss/value: 0.15577
[32m[1103 18:09:19 @monitor.py:362][0m macro_auc: 0.92486
[32m[1103 18:09:19 @monitor.py:362][0m macro_f1: 0.37694
[32m[1103 18:09:19 @monitor.py:362][0m mean_average_precision: 0.65093
[32m[1103 18:09:19 @monitor.py:362][0m micro_auc: 0.88235
[32m[1103 18:09:19 @monitor.py:362][0m micro_f1: 0.36985
[32m[1103 18:09:19 @monitor.py:362][0m one_error: 0.51562
[32m[1103 18:09:19 @monitor.py:362][0m ranking_loss: 0.13721
[32m[1103 18:09:19 @monitor.py:362][0m ranking_me

100%|##########|46/46[01:23<00:00, 0.54it/s]

[32m[1103 18:10:42 @base.py:267][0m Epoch 9 (global_step 414) finished, time:83.23 sec.



100%|##########|18/18[00:25<00:00, 0.73it/s]


[32m[1103 18:11:07 @saver.py:90][0m Model saved to ./train_log/all-stage/model-414.
[32m[1103 18:11:07 @saver.py:159][0m Model with maximum 'micro_auc' saved.
[32m[1103 18:11:07 @monitor.py:362][0m DataParallelInferenceRunner/QueueInput/queue_size: 49.577
[32m[1103 18:11:07 @monitor.py:362][0m QueueInput/queue_size: 50
[32m[1103 18:11:07 @monitor.py:362][0m coverage: 4.5174
[32m[1103 18:11:07 @monitor.py:362][0m learning_rate: 0.0001
[32m[1103 18:11:07 @monitor.py:362][0m loss/value: 0.15319
[32m[1103 18:11:07 @monitor.py:362][0m macro_auc: 0.92637
[32m[1103 18:11:07 @monitor.py:362][0m macro_f1: 0.37852
[32m[1103 18:11:07 @monitor.py:362][0m mean_average_precision: 0.6605
[32m[1103 18:11:07 @monitor.py:362][0m micro_auc: 0.88705
[32m[1103 18:11:07 @monitor.py:362][0m micro_f1: 0.37556
[32m[1103 18:11:07 @monitor.py:362][0m one_error: 0.54167
[32m[1103 18:11:07 @monitor.py:362][0m ranking_loss: 0.13298
[32m[1103 18:11:07 @monitor.py:362][0m ranking_mean_av

100%|##########|46/46[01:22<00:00, 0.58it/s]

[32m[1103 18:12:30 @base.py:267][0m Epoch 10 (global_step 460) finished, time:82.86 sec.



100%|##########|18/18[00:24<00:00, 0.71it/s]


[32m[1103 18:12:55 @saver.py:90][0m Model saved to ./train_log/all-stage/model-460.
[32m[1103 18:12:56 @saver.py:159][0m Model with maximum 'micro_auc' saved.
[32m[1103 18:12:56 @monitor.py:362][0m DataParallelInferenceRunner/QueueInput/queue_size: 49.597
[32m[1103 18:12:56 @monitor.py:362][0m QueueInput/queue_size: 49.957
[32m[1103 18:12:56 @monitor.py:362][0m coverage: 4.5286
[32m[1103 18:12:56 @monitor.py:362][0m learning_rate: 0.0001
[32m[1103 18:12:56 @monitor.py:362][0m loss/value: 0.1454
[32m[1103 18:12:56 @monitor.py:362][0m macro_auc: 0.92995
[32m[1103 18:12:56 @monitor.py:362][0m macro_f1: 0.37014
[32m[1103 18:12:56 @monitor.py:362][0m mean_average_precision: 0.66919
[32m[1103 18:12:56 @monitor.py:362][0m micro_auc: 0.89019
[32m[1103 18:12:56 @monitor.py:362][0m micro_f1: 0.37239
[32m[1103 18:12:56 @monitor.py:362][0m one_error: 0.51823
[32m[1103 18:12:56 @monitor.py:362][0m ranking_loss: 0.12608
[32m[1103 18:12:56 @monitor.py:362][0m ranking_mea

100%|##########|46/46[01:23<00:00, 0.55it/s]

[32m[1103 18:14:19 @base.py:267][0m Epoch 11 (global_step 506) finished, time:83.04 sec.



100%|##########|18/18[00:25<00:00, 0.70it/s]


[32m[1103 18:14:45 @saver.py:90][0m Model saved to ./train_log/all-stage/model-506.
[32m[1103 18:14:46 @saver.py:159][0m Model with maximum 'micro_auc' saved.
[32m[1103 18:14:46 @monitor.py:362][0m DataParallelInferenceRunner/QueueInput/queue_size: 49.848
[32m[1103 18:14:46 @monitor.py:362][0m QueueInput/queue_size: 49.996
[32m[1103 18:14:46 @monitor.py:362][0m coverage: 3.8594
[32m[1103 18:14:46 @monitor.py:362][0m learning_rate: 0.0001
[32m[1103 18:14:46 @monitor.py:362][0m loss/value: 0.14381
[32m[1103 18:14:46 @monitor.py:362][0m macro_auc: 0.93284
[32m[1103 18:14:46 @monitor.py:362][0m macro_f1: 0.35606
[32m[1103 18:14:46 @monitor.py:362][0m mean_average_precision: 0.6903
[32m[1103 18:14:46 @monitor.py:362][0m micro_auc: 0.91431
[32m[1103 18:14:46 @monitor.py:362][0m micro_f1: 0.3649
[32m[1103 18:14:46 @monitor.py:362][0m one_error: 0.43663
[32m[1103 18:14:46 @monitor.py:362][0m ranking_loss: 0.098734
[32m[1103 18:14:46 @monitor.py:362][0m ranking_mea

100%|##########|46/46[01:23<00:00, 0.55it/s]

[32m[1103 18:16:09 @base.py:267][0m Epoch 12 (global_step 552) finished, time:83.13 sec.



100%|##########|18/18[00:25<00:00, 0.73it/s]


[32m[1103 18:16:35 @saver.py:90][0m Model saved to ./train_log/all-stage/model-552.
[32m[1103 18:16:35 @saver.py:159][0m Model with maximum 'micro_auc' saved.
[32m[1103 18:16:35 @monitor.py:362][0m DataParallelInferenceRunner/QueueInput/queue_size: 49.7
[32m[1103 18:16:35 @monitor.py:362][0m QueueInput/queue_size: 49.941
[32m[1103 18:16:35 @monitor.py:362][0m coverage: 3.5547
[32m[1103 18:16:35 @monitor.py:362][0m learning_rate: 0.0001
[32m[1103 18:16:35 @monitor.py:362][0m loss/value: 0.13905
[32m[1103 18:16:35 @monitor.py:362][0m macro_auc: 0.93437
[32m[1103 18:16:35 @monitor.py:362][0m macro_f1: 0.3663
[32m[1103 18:16:35 @monitor.py:362][0m mean_average_precision: 0.69596
[32m[1103 18:16:35 @monitor.py:362][0m micro_auc: 0.92326
[32m[1103 18:16:35 @monitor.py:362][0m micro_f1: 0.37712
[32m[1103 18:16:35 @monitor.py:362][0m one_error: 0.37153
[32m[1103 18:16:35 @monitor.py:362][0m ranking_loss: 0.087192
[32m[1103 18:16:35 @monitor.py:362][0m ranking_mean

100%|##########|46/46[01:22<00:00, 0.55it/s]

[32m[1103 18:17:58 @base.py:267][0m Epoch 13 (global_step 598) finished, time:82.33 sec.



100%|##########|18/18[00:24<00:00, 0.71it/s]


[32m[1103 18:18:23 @saver.py:90][0m Model saved to ./train_log/all-stage/model-598.
[32m[1103 18:18:23 @monitor.py:362][0m DataParallelInferenceRunner/QueueInput/queue_size: 49.78
[32m[1103 18:18:23 @monitor.py:362][0m QueueInput/queue_size: 49.981
[32m[1103 18:18:23 @monitor.py:362][0m coverage: 4.099
[32m[1103 18:18:23 @monitor.py:362][0m learning_rate: 0.0001
[32m[1103 18:18:23 @monitor.py:362][0m loss/value: 0.13394
[32m[1103 18:18:23 @monitor.py:362][0m macro_auc: 0.93744
[32m[1103 18:18:23 @monitor.py:362][0m macro_f1: 0.39461
[32m[1103 18:18:23 @monitor.py:362][0m mean_average_precision: 0.69978
[32m[1103 18:18:23 @monitor.py:362][0m micro_auc: 0.91228
[32m[1103 18:18:23 @monitor.py:362][0m micro_f1: 0.40104
[32m[1103 18:18:23 @monitor.py:362][0m one_error: 0.37587
[32m[1103 18:18:23 @monitor.py:362][0m ranking_loss: 0.10248
[32m[1103 18:18:23 @monitor.py:362][0m ranking_mean_average_precision: 0.67894
[32m[1103 18:18:23 @monitor.py:362][0m training

100%|##########|46/46[01:22<00:00, 0.54it/s]

[32m[1103 18:19:45 @base.py:267][0m Epoch 14 (global_step 644) finished, time:82.22 sec.



100%|##########|18/18[00:25<00:00, 0.72it/s]


[32m[1103 18:20:11 @saver.py:90][0m Model saved to ./train_log/all-stage/model-644.
[32m[1103 18:20:12 @saver.py:159][0m Model with maximum 'micro_auc' saved.
[32m[1103 18:20:12 @monitor.py:362][0m DataParallelInferenceRunner/QueueInput/queue_size: 49.917
[32m[1103 18:20:12 @monitor.py:362][0m QueueInput/queue_size: 49.96
[32m[1103 18:20:12 @monitor.py:362][0m coverage: 3.362
[32m[1103 18:20:12 @monitor.py:362][0m learning_rate: 0.0001
[32m[1103 18:20:12 @monitor.py:362][0m loss/value: 0.12315
[32m[1103 18:20:12 @monitor.py:362][0m macro_auc: 0.9361
[32m[1103 18:20:12 @monitor.py:362][0m macro_f1: 0.4
[32m[1103 18:20:12 @monitor.py:362][0m mean_average_precision: 0.70137
[32m[1103 18:20:12 @monitor.py:362][0m micro_auc: 0.93219
[32m[1103 18:20:12 @monitor.py:362][0m micro_f1: 0.41592
[32m[1103 18:20:12 @monitor.py:362][0m one_error: 0.32726
[32m[1103 18:20:12 @monitor.py:362][0m ranking_loss: 0.074664
[32m[1103 18:20:12 @monitor.py:362][0m ranking_mean_ave

100%|##########|46/46[01:21<00:00, 0.58it/s]

[32m[1103 18:21:34 @base.py:267][0m Epoch 15 (global_step 690) finished, time:81.86 sec.
[32m[1103 18:21:34 @param.py:144][0m After epoch 15, learning_rate will change to 0.00010000



100%|##########|18/18[00:24<00:00, 0.75it/s]


[32m[1103 18:21:59 @saver.py:90][0m Model saved to ./train_log/all-stage/model-690.
[32m[1103 18:21:59 @saver.py:159][0m Model with maximum 'micro_auc' saved.
[32m[1103 18:21:59 @monitor.py:362][0m DataParallelInferenceRunner/QueueInput/queue_size: 49.671
[32m[1103 18:21:59 @monitor.py:362][0m QueueInput/queue_size: 49.996
[32m[1103 18:21:59 @monitor.py:362][0m coverage: 3.2448
[32m[1103 18:21:59 @monitor.py:362][0m learning_rate: 0.0001
[32m[1103 18:21:59 @monitor.py:362][0m loss/value: 0.12909
[32m[1103 18:21:59 @monitor.py:362][0m macro_auc: 0.94067
[32m[1103 18:21:59 @monitor.py:362][0m macro_f1: 0.36437
[32m[1103 18:21:59 @monitor.py:362][0m mean_average_precision: 0.71288
[32m[1103 18:21:59 @monitor.py:362][0m micro_auc: 0.93444
[32m[1103 18:21:59 @monitor.py:362][0m micro_f1: 0.38464
[32m[1103 18:21:59 @monitor.py:362][0m one_error: 0.35677
[32m[1103 18:21:59 @monitor.py:362][0m ranking_loss: 0.073931
[32m[1103 18:21:59 @monitor.py:362][0m ranking_m

100%|##########|46/46[01:22<00:00, 0.56it/s]

[32m[1103 18:23:22 @base.py:267][0m Epoch 16 (global_step 736) finished, time:82.52 sec.



100%|##########|18/18[00:24<00:00, 0.70it/s]


[32m[1103 18:23:47 @saver.py:90][0m Model saved to ./train_log/all-stage/model-736.
[32m[1103 18:23:48 @saver.py:159][0m Model with maximum 'micro_auc' saved.
[32m[1103 18:23:48 @monitor.py:362][0m DataParallelInferenceRunner/QueueInput/queue_size: 49.757
[32m[1103 18:23:48 @monitor.py:362][0m QueueInput/queue_size: 50
[32m[1103 18:23:48 @monitor.py:362][0m coverage: 3.0877
[32m[1103 18:23:48 @monitor.py:362][0m learning_rate: 0.0001
[32m[1103 18:23:48 @monitor.py:362][0m loss/value: 0.12412
[32m[1103 18:23:48 @monitor.py:362][0m macro_auc: 0.94263
[32m[1103 18:23:48 @monitor.py:362][0m macro_f1: 0.39197
[32m[1103 18:23:48 @monitor.py:362][0m mean_average_precision: 0.72445
[32m[1103 18:23:48 @monitor.py:362][0m micro_auc: 0.9391
[32m[1103 18:23:48 @monitor.py:362][0m micro_f1: 0.41386
[32m[1103 18:23:48 @monitor.py:362][0m one_error: 0.32205
[32m[1103 18:23:48 @monitor.py:362][0m ranking_loss: 0.066613
[32m[1103 18:23:48 @monitor.py:362][0m ranking_mean_a

100%|##########|46/46[01:22<00:00, 0.55it/s]

[32m[1103 18:25:10 @base.py:267][0m Epoch 17 (global_step 782) finished, time:82.47 sec.



100%|##########|18/18[00:25<00:00, 0.71it/s]


[32m[1103 18:25:36 @saver.py:90][0m Model saved to ./train_log/all-stage/model-782.
[32m[1103 18:25:37 @saver.py:159][0m Model with maximum 'micro_auc' saved.
[32m[1103 18:25:37 @monitor.py:362][0m DataParallelInferenceRunner/QueueInput/queue_size: 49.908
[32m[1103 18:25:37 @monitor.py:362][0m QueueInput/queue_size: 49.974
[32m[1103 18:25:37 @monitor.py:362][0m coverage: 2.9531
[32m[1103 18:25:37 @monitor.py:362][0m learning_rate: 0.0001
[32m[1103 18:25:37 @monitor.py:362][0m loss/value: 0.12171
[32m[1103 18:25:37 @monitor.py:362][0m macro_auc: 0.94343
[32m[1103 18:25:37 @monitor.py:362][0m macro_f1: 0.39194
[32m[1103 18:25:37 @monitor.py:362][0m mean_average_precision: 0.72873
[32m[1103 18:25:37 @monitor.py:362][0m micro_auc: 0.94326
[32m[1103 18:25:37 @monitor.py:362][0m micro_f1: 0.40806
[32m[1103 18:25:37 @monitor.py:362][0m one_error: 0.31424
[32m[1103 18:25:37 @monitor.py:362][0m ranking_loss: 0.062294
[32m[1103 18:25:37 @monitor.py:362][0m ranking_m

100%|##########|46/46[01:22<00:00, 0.57it/s]

[32m[1103 18:26:59 @base.py:267][0m Epoch 18 (global_step 828) finished, time:82.15 sec.



100%|##########|18/18[00:25<00:00, 0.71it/s]


[32m[1103 18:27:24 @saver.py:90][0m Model saved to ./train_log/all-stage/model-828.
[32m[1103 18:27:24 @monitor.py:362][0m DataParallelInferenceRunner/QueueInput/queue_size: 49.531
[32m[1103 18:27:24 @monitor.py:362][0m QueueInput/queue_size: 49.992
[32m[1103 18:27:24 @monitor.py:362][0m coverage: 3.0512
[32m[1103 18:27:24 @monitor.py:362][0m learning_rate: 0.0001
[32m[1103 18:27:24 @monitor.py:362][0m loss/value: 0.11868
[32m[1103 18:27:24 @monitor.py:362][0m macro_auc: 0.94367
[32m[1103 18:27:24 @monitor.py:362][0m macro_f1: 0.42422
[32m[1103 18:27:24 @monitor.py:362][0m mean_average_precision: 0.72568
[32m[1103 18:27:24 @monitor.py:362][0m micro_auc: 0.94182
[32m[1103 18:27:24 @monitor.py:362][0m micro_f1: 0.43785
[32m[1103 18:27:24 @monitor.py:362][0m one_error: 0.29514
[32m[1103 18:27:24 @monitor.py:362][0m ranking_loss: 0.063406
[32m[1103 18:27:24 @monitor.py:362][0m ranking_mean_average_precision: 0.77073
[32m[1103 18:27:24 @monitor.py:362][0m train

100%|##########|46/46[01:22<00:00, 0.56it/s]

[32m[1103 18:28:47 @base.py:267][0m Epoch 19 (global_step 874) finished, time:82.62 sec.



100%|##########|18/18[00:24<00:00, 0.72it/s]


[32m[1103 18:29:12 @saver.py:90][0m Model saved to ./train_log/all-stage/model-874.
[32m[1103 18:29:12 @monitor.py:362][0m DataParallelInferenceRunner/QueueInput/queue_size: 49.802
[32m[1103 18:29:12 @monitor.py:362][0m QueueInput/queue_size: 49.991
[32m[1103 18:29:12 @monitor.py:362][0m coverage: 3.3186
[32m[1103 18:29:12 @monitor.py:362][0m learning_rate: 0.0001
[32m[1103 18:29:12 @monitor.py:362][0m loss/value: 0.1119
[32m[1103 18:29:12 @monitor.py:362][0m macro_auc: 0.94398
[32m[1103 18:29:12 @monitor.py:362][0m macro_f1: 0.41422
[32m[1103 18:29:12 @monitor.py:362][0m mean_average_precision: 0.73441
[32m[1103 18:29:12 @monitor.py:362][0m micro_auc: 0.93434
[32m[1103 18:29:12 @monitor.py:362][0m micro_f1: 0.43301
[32m[1103 18:29:12 @monitor.py:362][0m one_error: 0.34809
[32m[1103 18:29:12 @monitor.py:362][0m ranking_loss: 0.075928
[32m[1103 18:29:12 @monitor.py:362][0m ranking_mean_average_precision: 0.7335
[32m[1103 18:29:12 @monitor.py:362][0m trainin

100%|##########|46/46[01:22<00:00, 0.55it/s]

[32m[1103 18:30:35 @base.py:267][0m Epoch 20 (global_step 920) finished, time:82.65 sec.



100%|##########|18/18[00:25<00:00, 0.72it/s]


[32m[1103 18:31:00 @saver.py:90][0m Model saved to ./train_log/all-stage/model-920.
[32m[1103 18:31:01 @saver.py:159][0m Model with maximum 'micro_auc' saved.
[32m[1103 18:31:01 @monitor.py:362][0m DataParallelInferenceRunner/QueueInput/queue_size: 49.925
[32m[1103 18:31:01 @monitor.py:362][0m QueueInput/queue_size: 49.989
[32m[1103 18:31:01 @monitor.py:362][0m coverage: 2.8299
[32m[1103 18:31:01 @monitor.py:362][0m learning_rate: 0.0001
[32m[1103 18:31:01 @monitor.py:362][0m loss/value: 0.10828
[32m[1103 18:31:01 @monitor.py:362][0m macro_auc: 0.94551
[32m[1103 18:31:01 @monitor.py:362][0m macro_f1: 0.44297
[32m[1103 18:31:01 @monitor.py:362][0m mean_average_precision: 0.73553
[32m[1103 18:31:01 @monitor.py:362][0m micro_auc: 0.9468
[32m[1103 18:31:01 @monitor.py:362][0m micro_f1: 0.45549
[32m[1103 18:31:01 @monitor.py:362][0m one_error: 0.28993
[32m[1103 18:31:01 @monitor.py:362][0m ranking_loss: 0.057721
[32m[1103 18:31:01 @monitor.py:362][0m ranking_me

100%|##########|46/46[01:22<00:00, 0.54it/s]

[32m[1103 18:32:24 @base.py:267][0m Epoch 21 (global_step 966) finished, time:82.62 sec.



100%|##########|18/18[00:24<00:00, 0.74it/s]


[32m[1103 18:32:49 @saver.py:90][0m Model saved to ./train_log/all-stage/model-966.
[32m[1103 18:32:49 @monitor.py:362][0m DataParallelInferenceRunner/QueueInput/queue_size: 49.542
[32m[1103 18:32:49 @monitor.py:362][0m QueueInput/queue_size: 49.999
[32m[1103 18:32:49 @monitor.py:362][0m coverage: 2.9566
[32m[1103 18:32:49 @monitor.py:362][0m learning_rate: 0.0001
[32m[1103 18:32:49 @monitor.py:362][0m loss/value: 0.10964
[32m[1103 18:32:49 @monitor.py:362][0m macro_auc: 0.94399
[32m[1103 18:32:49 @monitor.py:362][0m macro_f1: 0.45075
[32m[1103 18:32:49 @monitor.py:362][0m mean_average_precision: 0.72654
[32m[1103 18:32:49 @monitor.py:362][0m micro_auc: 0.94365
[32m[1103 18:32:49 @monitor.py:362][0m micro_f1: 0.46634
[32m[1103 18:32:49 @monitor.py:362][0m one_error: 0.30556
[32m[1103 18:32:49 @monitor.py:362][0m ranking_loss: 0.062439
[32m[1103 18:32:49 @monitor.py:362][0m ranking_mean_average_precision: 0.7709
[32m[1103 18:32:49 @monitor.py:362][0m traini

100%|##########|46/46[01:22<00:00, 0.55it/s]

[32m[1103 18:34:12 @base.py:267][0m Epoch 22 (global_step 1012) finished, time:82.52 sec.



100%|##########|18/18[00:25<00:00, 0.71it/s]


[32m[1103 18:34:37 @saver.py:90][0m Model saved to ./train_log/all-stage/model-1012.
[32m[1103 18:34:37 @monitor.py:362][0m DataParallelInferenceRunner/QueueInput/queue_size: 49.827
[32m[1103 18:34:37 @monitor.py:362][0m QueueInput/queue_size: 50
[32m[1103 18:34:37 @monitor.py:362][0m coverage: 3.3628
[32m[1103 18:34:37 @monitor.py:362][0m learning_rate: 0.0001
[32m[1103 18:34:37 @monitor.py:362][0m loss/value: 0.10494
[32m[1103 18:34:37 @monitor.py:362][0m macro_auc: 0.94593
[32m[1103 18:34:37 @monitor.py:362][0m macro_f1: 0.47601
[32m[1103 18:34:37 @monitor.py:362][0m mean_average_precision: 0.74381
[32m[1103 18:34:37 @monitor.py:362][0m micro_auc: 0.92501
[32m[1103 18:34:37 @monitor.py:362][0m micro_f1: 0.47503
[32m[1103 18:34:37 @monitor.py:362][0m one_error: 0.3724
[32m[1103 18:34:37 @monitor.py:362][0m ranking_loss: 0.081982
[32m[1103 18:34:37 @monitor.py:362][0m ranking_mean_average_precision: 0.72581
[32m[1103 18:34:37 @monitor.py:362][0m training_

In [14]:
tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)

[<tf.Variable 'EMA/QueueInput/queue_size:0' shape=() dtype=float32_ref>,
 <tf.Variable 'EMA/QueueInput/queue_size/biased:0' shape=() dtype=float32_ref>,
 <tf.Variable 'EMA/QueueInput/queue_size/local_step:0' shape=() dtype=float32_ref>,
 <tf.Variable 'resnet_v2_101/conv1/weights:0' shape=(7, 7, 3, 64) dtype=float32_ref>,
 <tf.Variable 'resnet_v2_101/conv1/biases:0' shape=(64,) dtype=float32_ref>,
 <tf.Variable 'resnet_v2_101/block1/unit_1/bottleneck_v2/preact/beta:0' shape=(64,) dtype=float32_ref>,
 <tf.Variable 'resnet_v2_101/block1/unit_1/bottleneck_v2/preact/gamma:0' shape=(64,) dtype=float32_ref>,
 <tf.Variable 'resnet_v2_101/block1/unit_1/bottleneck_v2/preact/moving_mean:0' shape=(64,) dtype=float32_ref>,
 <tf.Variable 'resnet_v2_101/block1/unit_1/bottleneck_v2/preact/moving_variance:0' shape=(64,) dtype=float32_ref>,
 <tf.Variable 'resnet_v2_101/block1/unit_1/bottleneck_v2/shortcut/weights:0' shape=(1, 1, 64, 256) dtype=float32_ref>,
 <tf.Variable 'resnet_v2_101/block1/unit_1/bot